def elu_layer(layer_config, bottom_name): '''For ELU layer, top=bottom(caffe feature)''' return L.ELU(name = layer_config['name'], bottom=bottom_name, ntop = 0, top=bottom_name if layer_config['inplace'] else layer_config['name'], alpha= layer_config['alpha'], in_place=layer_config['inplace'])
def test_elu(): # type: ()->caffe.NetSpec n = caffe.NetSpec() n.input1 = L.Input(shape=make_shape([10, 4, 64, 64])) n.elu1 = L.ELU(n.input1, alpha=1.0) return n
def apply_activation(layer, bottom): if keras.activations.serialize(layer.activation) == 'relu': return L.ReLU(bottom, in_place=True) elif keras.activations.serialize(layer.activation) == 'softmax': return L.Softmax( bottom) # Cannot extract axis from model, so default to -1 elif keras.activations.serialize(layer.activation) == 'softsign': # Needs to be implemented in caffe2dml raise Exception("softsign is not implemented") elif keras.activations.serialize(layer.activation) == 'elu': return L.ELU(bottom) elif keras.activations.serialize(layer.activation) == 'selu': # Needs to be implemented in caffe2dml raise Exception("SELU activation is not implemented") elif keras.activations.serialize(layer.activation) == 'sigmoid': return L.Sigmoid(bottom) elif keras.activations.serialize(layer.activation) == 'tanh': return L.TanH(bottom)
def compile_time_operation(self, learning_option, cluster): """ define exponential-linear unit(ELU) operation for input tensor Computes exponential linear: exp(features) - 1 if < 0, features otherwise. """ # get input input_ = self.get_input('input') indim = self.get_dimension('input') # get attr # optional field slope = float(self.get_attr('slope', default=1.0)) elu = L.ELU(input_, name=self.name, alpha=slope) #set output dimension outdim = indim self.set_output('output', elu) self.set_dimension('output', outdim)
def generate_layer(blobs, layer, n, net_params): """ Parameters: blobs: weights for keras, layer: keras layer, n: Caffe NetSpec, net_params: Dictionary to store Caffe weights """ if type(layer) == keras.layers.InputLayer: name = layer.name input_shape = list(layer.batch_input_shape) input_shape = [1, input_shape[3], input_shape[1], input_shape[2]] n[name] = L.Input(shape=[dict(dim=input_shape)]) print(f'generate {name} ok...') elif type(layer) == keras.layers.Dense: name = layer.name config = layer.get_config() use_bias = config['use_bias'] if use_bias == None: use_bias = False if use_bias: net_params[name] = (np.array(blobs[0]).transpose(1, 0), np.array(blobs[1])) else: net_params[name] = (blobs[0]) in_nodes = get_input_nodes(layer) n[name] = L.InnerProduct(n[in_nodes[0].name], num_output=layer.units, bias_term=use_bias) if layer.activation is not None and layer.activation.__name__ != 'linear': name_act = name + "_activation_" + layer.activation.__name__ # get function string n[name_act] = apply_activation(layer, n[name]) print(f'generate {name} ok...') elif type(layer) == keras.layers.Flatten: raise Exception(f"{layer.name} is not implemented") elif type(layer) == keras.layers.Dropout: name = layer.name in_nodes = get_input_nodes(layer) n[name] = L.Dropout(n[in_nodes[0].name], dropout_ratio=layer.rate, in_place=True) print(f'generate {name} ok...') elif type(layer) == keras.layers.Add: name = layer.name in_nodes = get_input_nodes(layer) network_layers = [] for ref in in_nodes: network_layers.append(n[ref.name]) n[name] = L.Eltwise(*network_layers, operation=1) # 1 is SUM print(f'generate {name} ok...') elif type(layer) == keras.layers.Multiply: name = layer.name in_nodes = get_input_nodes(layer) network_layers = [] for ref in in_nodes: network_layers.append(n[ref.name]) n[name] = L.Eltwise(*network_layers, operation=0) print(f'generate {name} ok...') elif type(layer) == keras.layers.Concatenate: name = layer.name in_nodes = get_input_nodes(layer) network_layers = [] for ref in in_nodes: network_layers.append(n[ref.name]) n[name] = L.Concat(*network_layers, axis=1) print(f'generate {name} ok...') elif type(layer) == keras.layers.Maximum: name = layer.name in_nodes = get_input_nodes(layer) network_layers = [] for ref in in_nodes: network_layers += n[ref.name] n[name] = L.Eltwise(*network_layers, operation=2) print(f'generate {name} ok...') elif type(layer) == keras.layers.Conv2DTranspose: ''' DeconvolutionLayer: output = (input - 1) * stride + kernel_size - 2 * pad; kernel_size: {{2 * factor - factor % 2}} stride: {{factor}} num_output: {{C}} group: {{C}} pad: {{ceil((factor - 1) / 2.)}} ''' name = layer.name in_nodes = get_input_nodes(layer) # Stride if layer.strides is None: stride = (1, 1) else: stride = layer.strides # if layer.padding == 'same': # Calculate the padding for 'same' # padding = [layer.kernel_size[0] // 2, layer.kernel_size[1] // 2] # else: # padding = [0, 0] # If padding is valid(aka no padding) config = layer.get_config() use_bias = config['use_bias'] if use_bias == None: use_bias = False n[name] = L.Deconvolution(n[in_nodes[0].name], convolution_param=dict( kernel_h=layer.kernel_size[0], kernel_w=layer.kernel_size[1], stride_h=stride[0], stride_w=stride[1], num_output=layer.filters, pad_h=math.ceil((stride[0] - 1) / 2.), pad_w=math.ceil((stride[1] - 1) / 2.), bias_term=use_bias)) blobs[0] = np.array(blobs[0]).transpose(3, 2, 0, 1) net_params[name] = blobs if layer.activation is not None and layer.activation.__name__ != 'linear': name_act = name + "_activation_" + layer.activation.__name__ # get function string n[name_act] = apply_activation(layer, n[name]) print(f'generate {name} ok...') elif type(layer) == keras.layers.BatchNormalization: name = layer.name in_nodes = get_input_nodes(layer) n[name] = L.BatchNorm(n[in_nodes[0].name], moving_average_fraction=layer.momentum, eps=layer.epsilon, use_global_stats=True) variance = np.array(blobs[-1]) mean = np.array(blobs[-2]) config = layer.get_config() param = dict() if config['scale']: gamma = np.array(blobs[0]) else: gamma = np.ones(mean.shape, dtype=np.float32) if config['center']: beta = np.array(blobs[1]) param['bias_term'] = True else: beta = np.zeros(mean.shape, dtype=np.float32) param['bias_term'] = False net_params[name] = (mean, variance, np.array([1.0])) # Scale after batchNorm name_scale = name + '_scale' n[name_scale] = L.Scale(n[name], in_place=True, scale_param=param) net_params[name_scale] = (gamma, beta) print(f'generate {name} ok...') # TODO Needs to be implemented elif type(layer) == keras.layers.Conv1D: raise Exception(f"{layer.name} is not implemented") elif type(layer) == keras.layers.ZeroPadding2D: print(f"{layer.name} is passed...") elif type(layer) == keras.layers.Conv2D: ''' ConvolutionLayer: output = (input + 2 * pad - kernel_size) / stride + 1; kernel_shape: [out,in,k_size_h,k_size_w] ''' name = layer.name # Padding if layer.padding == 'same': # Calculate the padding for 'same' padding = [layer.kernel_size[0] // 2, layer.kernel_size[1] // 2] else: padding = [0, 0] # If padding is valid(aka no padding) in_nodes = get_input_nodes(layer) if type(in_nodes[0]) == keras.layers.ZeroPadding2D: in_nodes = get_input_nodes(in_nodes[0]) padding = [layer.kernel_size[0] // 2, layer.kernel_size[1] // 2] if layer.strides is None: stride = (1, 1) else: stride = layer.strides # TODO The rest of the arguements including bias, regulizers, dilation, config = layer.get_config() # print(config) # get bias parameter use_bias = config['use_bias'] if use_bias == None: use_bias = False n[name] = L.Convolution(n[in_nodes[0].name], kernel_h=layer.kernel_size[0], kernel_w=layer.kernel_size[1], stride_h=stride[0], stride_w=stride[1], num_output=layer.filters, pad_h=padding[0], pad_w=padding[1], bias_term=use_bias) # weights = blobs blobs[0] = np.array(blobs[0]).transpose((3, 2, 0, 1)) # print(blobs[0].shape) net_params[name] = blobs if layer.activation is not None and layer.activation.__name__ != 'linear': name_act = name + "_activation_" + layer.activation.__name__ # get function string n[name_act] = apply_activation(layer, n[name]) print(f'generate {name} ok...') elif type(layer) == keras.layers.MaxPooling2D or type( layer) == keras.layers.AveragePooling2D: name = layer.name in_nodes = get_input_nodes(layer) if type(layer) == keras.layers.MaxPooling2D: pool = P.Pooling.MAX else: # NOTE AveragePooling needs to be implemented pool = P.Pooling.AVE # Padding # TODO The rest of the arguements including bias, regulizers, dilatin, if layer.strides is None: stride = (1, 1) else: stride = layer.strides # Padding if layer.padding == 'same': # Calculate the padding for 'same' padding = [layer.pool_size[0] // 2, layer.pool_size[1] // 2] else: padding = [0, 0] # If padding is valid(aka no padding) n[name] = L.Pooling(n[in_nodes[0].name], kernel_h=layer.pool_size[0], kernel_w=layer.pool_size[1], stride_h=stride[0], stride_w=stride[1], pad_h=padding[0], pad_w=padding[1], pool=pool) print(f'generate {name} ok...') # Activation (wrapper for activations) and Advanced Activation Layers elif type(layer) == keras.layers.Activation: name = layer.name in_nodes = get_input_nodes(layer) n[name] = apply_activation(layer, n[in_nodes[0].name]) # TODO: Assert only 1 print(f'generate {name} ok...') # Caffe lacks intializer, regulizer, and constraint params elif type(layer) == keras.layers.LeakyReLU: # TODO: figure out how to pass Leaky params name = layer.name in_nodes = get_input_nodes(layer) n[name] = L.PReLU(n[in_nodes[0].name]) print(f'generate {name} ok...') elif type(layer) == keras.layers.PReLU: name = layer.name in_nodes = get_input_nodes(layer) n[name] = L.PReLU(n[in_nodes[0].name]) print(f'generate {name} ok...') elif type(layer) == keras.layers.ELU: name = layer.name in_nodes = get_input_nodes(layer) n[name] = L.ELU(n[in_nodes[0].name], layer.alpha) print(f'generate {name} ok...') elif type(layer) == keras.layers.GlobalAveragePooling2D: name = layer.name in_nodes = get_input_nodes(layer) n[name] = L.Pooling(n[in_nodes[0].name], kernel_size=layer.kernel_size[0], stride=layer.strides[0], pad=layer.kernel_size[0] // 2, pool=P.Pooling.AVE) print(f'generate {name} ok...') else: raise Exception("Cannot convert model." + layer.name + " is not supported.")
def test_elu2(self): n = caffe.NetSpec() n.input1 = L.Input(shape=make_shape([10, 4, 64, 64])) n.elu1 = L.ELU(n.input1, alpha=2.0) self._test_model(*self._netspec_to_model(n, 'elu2'))
def generate_layer(blobs, layer, n, net_params): """ Parameters: blobs: weights for keras, layer: keras layer, n: Caffe NetSpec, net_params: Dictionary to store Caffe weights """ if type(layer) == keras.layers.InputLayer: # Grab the batchsize from i 0, shift over channels to index 1, and place the rest into the dictionary # TODO determine when to transform for layer types/input shape num = len( layer.batch_input_shape) - 1 # Range from 1st index to second last # TODO check for image_data_format to be channels_first or channels_last batch_list = [layer.batch_input_shape[0], layer.batch_input_shape[-1]] for i in range(1, num): batch_list.append(layer.batch_input_shape[i]) for i in range(len(batch_list)): # Set None dimensions to 0 for Caffe if (batch_list[i] == None): batch_list[i] = 1 name = layer.name # TODO figure out having 2 tops, with n.label n[name] = L.Input(shape=[dict(dim=batch_list)]) elif type(layer) == keras.layers.Dense: # Pull name from Keras name = layer.name # Pull layer name of the layer passing to current layer in_names = get_inbound_layers(layer) # Pipe names into caffe using unique Keras layer names n[name] = L.InnerProduct(n[in_names[0].name], num_output=layer.units) # TODO: Assert only 1 config = layer.get_config() if config['use_bias']: net_params[name] = (np.array(blobs[0]).transpose(1, 0), np.array(blobs[1])) else: net_params[name] = (blobs[0]) if layer.activation is not None and layer.activation.__name__ != 'linear': name_act = name + "_activation_" + layer.activation.__name__ # get function string n[name_act] = get_activation(layer, n[name]) elif type(layer) == keras.layers.Flatten: """ Caffe2DML implicitly stores all tensors as a 1D array with shapes so after every passthrough all outputs are already flatten thus, we can ignore all flattens are just pass the tops and bottoms across all flatten layers. """ elif type(layer) == keras.layers.Dropout: # TODO Random seed will be lost name = layer.name in_names = get_inbound_layers(layer) n[name] = L.Dropout(n[in_names[0].name], dropout_ratio=layer.rate, in_place=True) # elif type(layer) == keras.Layers.LSTM: elif type(layer) == keras.layers.Add: name = layer.name in_names = get_inbound_layers(layer) # turn list of names into network layers network_layers = [] for ref in in_names: network_layers.append(n[ref.name]) # print(network_layers) # unpack the bottom layers n[name] = L.Eltwise(*network_layers, operation=1) # 1 is SUM elif type(layer) == keras.layers.Multiply: name = layer.name in_names = get_inbound_layers(layer) # turn list of names into network layers network_layers = [] for ref in in_names: network_layers.append(n[ref.name]) # unpack the bottom layers n[name] = L.Eltwise(*network_layers, operation=0) elif type(layer) == keras.layers.Concatenate: name = layer.name in_names = get_inbound_layers(layer) # turn list of names into network layers network_layers = [] for ref in in_names: network_layers.append(n[ref.name]) axis = get_compensated_axis(layer) n[name] = L.Concat(*network_layers, axis=1) elif type(layer) == keras.layers.Maximum: name = layer.name in_names = get_inbound_layers(layer) # turn list of names into network layers network_layers = [] for ref in in_names: network_layers += n[ref.name] # unpack the bottom layers n[name] = L.Eltwise(*network_layers, operation=2) elif type(layer) == keras.layers.Conv2DTranspose: name = layer.name in_names = get_inbound_layers(layer) # Stride if layer.strides is None: stride = (1, 1) else: stride = layer.strides # Padding if layer.padding == 'same': # Calculate the padding for 'same' padding = [layer.kernel_size[0] / 2, layer.kernel_size[1] / 2] else: padding = [0, 0] # If padding is valid(aka no padding) # get bias parameter config = layer.get_config() use_bias = config['use_bias'] param = dict(bias_term=use_bias) n[name] = L.Deconvolution(n[in_names[0].name], kernel_h=layer.kernel_size[0], kernel_w=layer.kernel_size[1], stride_h=stride[0], stride_w=stride[1], num_output=layer.filters, pad_h=padding[0], pad_w=padding[1], convolution_param=param) blobs[0] = np.array(blobs[0]).transpose(3, 2, 0, 1) net_params[name] = blobs if layer.activation is not None and layer.activation.__name__ != 'linear': name_act = name + "_activation_" + layer.activation.__name__ # get function string n[name_act] = get_activation(layer, n[name]) elif type(layer) == keras.layers.BatchNormalization: name = layer.name in_names = get_inbound_layers(layer) n[name] = L.BatchNorm(n[in_names[0].name], moving_average_fraction=layer.momentum, eps=layer.epsilon) variance = np.array(blobs[-1]) mean = np.array(blobs[-2]) config = layer.get_config() # Set mean variance and gamma into respective params param = dict() if config['scale']: gamma = np.array(blobs[0]) else: gamma = np.ones(mean.shape, dtype=np.float32) if config['center']: beta = np.array(blobs[1]) param['bias_term'] = True else: beta = np.zeros(mean.shape, dtype=np.float32) param['bias_term'] = False net_params[name] = (mean, variance, np.array(1.0)) name_scale = name + '_scale' # Scale after batchNorm n[name_scale] = L.Scale(n[name], in_place=True, scale_param=param) net_params[name_scale] = (gamma, beta) # TODO Needs to be implemented elif type(layer) == keras.layers.Conv1D: name = layer.name in_names = get_inbound_layers(layer) n[name] = L.Convolution(n[in_names[0]]) elif type(layer) == keras.layers.Conv2D: name = layer.name in_names = get_inbound_layers(layer) # Stride if layer.strides is None: stride = (1, 1) else: stride = layer.strides # Padding if layer.padding == 'same': # Calculate the padding for 'same' padding = [layer.kernel_size[0] / 2, layer.kernel_size[1] / 2] else: padding = [0, 0] # If padding is valid(aka no padding) # TODO The rest of the arguements including bias, regulizers, dilation, config = layer.get_config() # get bias parameter use_bias = config['use_bias'] param = dict(bias_term=use_bias) n[name] = L.Convolution(n[in_names[0].name], kernel_h=layer.kernel_size[0], kernel_w=layer.kernel_size[1], stride_h=stride[0], stride_w=stride[1], num_output=layer.filters, pad_h=padding[0], pad_w=padding[1], convolution_param=param) weights = blobs blobs[0] = np.array(blobs[0]).transpose((3, 2, 0, 1)) print(type(weights)) net_params[name] = blobs if layer.activation is not None and layer.activation.__name__ != 'linear': name_act = name + "_activation_" + layer.activation.__name__ # get function string n[name_act] = get_activation(layer, n[name]) elif type(layer) == keras.layers.MaxPooling2D or type( layer) == keras.layers.AveragePooling2D: name = layer.name in_names = get_inbound_layers(layer) if type(layer) == keras.layers.MaxPooling2D: pool = P.Pooling.MAX else: # NOTE AveragePooling needs to be implemented pool = P.Pooling.AVE # Padding # TODO The rest of the arguements including bias, regulizers, dilatin, if layer.strides is None: stride = (1, 1) else: stride = layer.strides # Padding if layer.padding == 'same': # Calculate the padding for 'same' padding = [layer.pool_size[0] / 2, layer.pool_size[1] / 2] else: padding = [0, 0] # If padding is valid(aka no padding) n[name] = L.Pooling(n[in_names[0].name], kernel_h=layer.pool_size[0], kernel_w=layer.pool_size[1], stride_h=stride[0], stride_w=stride[1], pad_h=padding[0], pad_w=padding[1], pool=pool) """ if hasattr(layer,layer.activation): name_act = name + "_activation_" + layer.activation.__name__ #get function string n[name_act] = get_activation(layer,n[name]) """ # Activation (wrapper for activations) and Advanced Activation Layers elif type(layer) == keras.layers.Activation: name = layer.name in_names = get_inbound_layers(layer) n[name] = get_activation(layer, n[in_names[0].name]) # TODO: Assert only 1 # Caffe lacks intializer, regulizer, and constraint params elif type(layer) == keras.layers.LeakyReLU: # TODO: figure out how to pass Leaky params name = layer.name in_names = get_inbound_layers(layer) n[name] = L.PReLU(n[in_names[0].name]) elif type(layer) == keras.layers.PReLU: name = layer.name in_names = get_inbound_layers(layer) n[name] = L.PReLU(n[in_names[0].name]) elif type(layer) == keras.layers.ELU: name = layer.name in_names = get_inbound_layers(layer) n[name] = L.ELU(n[in_names[0].name], layer.alpha) elif type(layer) == keras.layers.GlobalAveragePooling2D: name = layer.name in_names = get_inbound_layers(layer) n[name] = L.Pooling(n[in_names[0].name], kernel_size=8, stride=8, pad=0, pool=P.Pooling.AVE) elif type(layer) == keras.layers.ZeroPadding2D: name = layer.name in_names = get_inbound_layers(layer) config = layer.get_config() padding = config['padding'] n[name] = L.Convolution(n[in_names[0].name], num_output=3, kernel_size=1, stride=1, pad_h=padding[0][0], pad_w=padding[1][0], convolution_param=dict(bias_term=False)) net_params[name] = np.ones((3, 3, 1, 1)) else: raise Exception("Cannot convert model. " + layer.name + " is not supported.")
def net(): n = caffe.NetSpec() n.data = L.Input(input_param=dict(shape=dict(dim=data_shape))) n.dataout = L.ELU(n.data, alpha=_alpha) return n.to_proto()