def output_layer(x_in): x = inputs = Input(x_in.shape[1:]) x = BatchNormalization()(x) x = Dropout(rate=0.5)(x) x = Flatten()(x) x = Dense(embd_shape, kernel_regularizer=_regularizer(w_decay))(x) x = BatchNormalization()(x) return Model(inputs, x, name=name)(x_in)
def build_decoder(self, optimizer, loss_function): decoder = NeuralNetwork(optimizer=optimizer, loss=loss_function) decoder.add(Dense(256, input_shape=(self.latent_dim, ))) decoder.add(Activation('leaky_relu')) decoder.add(BatchNormalization(momentum=0.8)) decoder.add(Dense(512)) decoder.add(Activation('leaky_relu')) decoder.add(BatchNormalization(momentum=0.8)) decoder.add(Dense(self.img_dim)) decoder.add(Activation('tanh')) return decoder
def bn_3_layer_test(epochs=2, reg=0.0, lr=0.01, momentum=0.7): trainingData, trainingLabels, \ validationData, validationLabels, \ testingData, testingLabels = loadAllData("Datasets/cifar-10-batches-mat/", valsplit=0.20) timestamp = datetime.now().strftime('%Y-%b-%d--%H-%M-%S') network = Model(name="NO BN") network.addLayer(Linear(32*32*3, 50, regularization=reg, initializer="he")) #network.addLayer(BatchNormalization(50, trainable=True)) network.addLayer(Relu()) network.addLayer(Linear(50, 30, regularization=reg, initializer="he")) #network.addLayer(BatchNormalization(30, trainable=True)) network.addLayer(Relu()) network.addLayer(Linear(30,10, regularization=reg, initializer="he")) network.addLayer(Softmax()) sgd = SGD(lr=lr, lr_decay=1.00, momentum=momentum, shuffle=True, lr_min=1e-5) network.compile(sgd, "cce") network.fit(trainingData, trainingLabels, epochs=epochs, batch_size=64, validationData=(validationData, validationLabels)) loss, acc = network.evaluate(testingData, testingLabels) print("NO BN: Test loss: {} , Test acc: {}".format(loss, acc) ) networkBN = Model(name="WITH BN") networkBN.addLayer(Linear(32*32*3, 50, regularization=reg, initializer="he")) networkBN.addLayer(BatchNormalization(50, trainable=True, alpha=0.99)) networkBN.addLayer(Relu()) networkBN.addLayer(Linear(50, 30, regularization=reg, initializer="he")) networkBN.addLayer(BatchNormalization(30, trainable=True, alpha=0.99)) networkBN.addLayer(Relu()) networkBN.addLayer(Linear(30,10, regularization=reg, initializer="he")) networkBN.addLayer(Softmax()) sgd2 = SGD(lr=lr, lr_decay=1.00, momentum=momentum, shuffle=True, lr_min=1e-5) networkBN.compile(sgd2, "cce") networkBN.fit(trainingData, trainingLabels, epochs=epochs, batch_size=64, validationData=(validationData, validationLabels)) #plotAccuracy(network, "plots/", timestamp) #plotLoss(network, "plots/", timestamp) loss, acc = networkBN.evaluate(testingData, testingLabels) print("W BN: Test loss: {} , Test acc: {}".format(loss, acc) ) multiPlotLoss((network, networkBN), "plots/", timestamp, title="3-layer network loss over epochs, eta:{}, lambda:{}".format(lr, reg)) multiPlotAccuracy((network, networkBN), "plots/", timestamp, title="3-layer network accuracy over epochs, eta:{}, lambda:{}".format(lr, reg))
def SeparableConvBlock(num_channels, kernel_size, strides, name, freeze_bn=False): """ Builds a small block consisting of a depthwise separable convolution layer and a batch norm layer Args: num_channels: Number of channels used in the BiFPN kernel_size: Kernel site of the depthwise separable convolution layer strides: Stride of the depthwise separable convolution layer name: Name of the block freeze_bn: Boolean indicating if the batch norm layers should be freezed during training or not. Returns: The depthwise separable convolution block """ f1 = layers.SeparableConv2D(num_channels, kernel_size=kernel_size, strides=strides, padding='same', use_bias=True, name=f'{name}/conv') f2 = BatchNormalization(freeze=freeze_bn, momentum=MOMENTUM, epsilon=EPSILON, name=f'{name}/bn') return reduce(lambda f, g: lambda *args, **kwargs: g(f(*args, **kwargs)), (f1, f2))
def regularizationSearch(): trainingData, trainingLabels, \ validationData, validationLabels, \ testingData, testingLabels = loadAllData("Datasets/cifar-10-batches-mat/", valsplit=0.10) bestLambda = 0.0 bestValAcc = 0.0 bestLoss = 0.0 for lambdaValue in np.arange(0, 0.2, 0.005): network = Model() network.addLayer(Linear(32*32*3, 50, regularization=lambdaValue, initializer="he")) network.addLayer(BatchNormalization(50, trainable=True)) network.addLayer(Relu()) network.addLayer(Linear(50, 30, regularization=lambdaValue, initializer="he")) network.addLayer(BatchNormalization(30, trainable=True)) network.addLayer(Relu()) network.addLayer(Linear(30,10, regularization=lambdaValue, initializer="he")) network.addLayer(Softmax()) sgd = SGD(lr=0.01, lr_decay=0.95, momentum=0.7, shuffle=True, lr_min=1e-5) network.compile(sgd, "cce") timestamp = datetime.now().strftime('%Y-%b-%d--%H-%M-%S') network.fit(trainingData, trainingLabels, epochs=20, validationData=(validationData, validationLabels), batch_size=64) #plotAccuracy(network, "plots/", timestamp) #plotLoss(network, "plots/", timestamp) print("Lambda:{}".format(lambdaValue)) loss, acc = network.evaluate(validationData, validationLabels) print("Val loss: {} , Val acc: {}".format(loss, acc) ) print("\n\n") if acc > bestValAcc: bestLambda = lambdaValue bestValAcc = acc bestLoss = loss return bestLambda, bestValAcc, bestLoss
def DepthwiseConvBlock(kernel_size, strides, name, freeze_bn=False): f1 = layers.DepthwiseConv2D(kernel_size=kernel_size, strides=strides, padding='same', use_bias=False, name='{}_dconv'.format(name)) f2 = BatchNormalization(freeze=freeze_bn, name='{}_bn'.format(name)) f3 = layers.ReLU(name='{}_relu'.format(name)) return reduce(lambda f, g: lambda *args, **kwargs: g(f(*args, **kwargs)), (f1, f2, f3))
def inner_model(trainable, x): layers_list = [ Reshape([-1, 28, 28, 1]), Conv(32), BatchNormalization(), Relu(), MaxPool(), Conv(64), BatchNormalization(), Relu(), MaxPool(), Reshape([-1, 7 * 7 * 64]), FullyConnected(1024), Relu(), FullyConnected(10) ] variable_saver = VariableSaver() signal = x print('shape', signal.get_shape()) for idx, layer in enumerate(layers_list): signal = layer.contribute(signal, idx, trainable, variable_saver.save_variable) print('shape', signal.get_shape()) return signal, variable_saver.var_list
def __init__(self, input_size, hidden_size_list, output_size, activation='relu', weight_init_std='relu', weight_decay_lambda=0, use_dropout=False, dropout_ratio=0.5, use_batchnorm=False): self.input_size = input_size self.output_size = output_size self.hidden_size_list = hidden_size_list self.hidden_layer_num = len(hidden_size_list) self.weight_decay_lambda = weight_decay_lambda self.use_dropout = use_dropout self.use_batchnorm = use_batchnorm self.params = {} # 权重初始化 self.__init_weight(weight_init_std) # 生成层 activation_layer = {'sigmoid': Sigmoid, 'relu': Relu} self.layers = OrderedDict() for idx in range(1, self.hidden_layer_num + 1): self.layers['Affine' + str(idx)] = Affine( self.params['W' + str(idx)], self.params['b' + str(idx)]) if self.use_batchnorm: self.params['gamma' + str(idx)] = np.ones( hidden_size_list[idx - 1]) self.params['beta' + str(idx)] = np.zeros( hidden_size_list[idx - 1]) self.layers['BatchNorm' + str(idx)] = BatchNormalization( self.params['gamma' + str(idx)], self.params['beta' + str(idx)]) self.layers['Activation_function' + str(idx)] = activation_layer[activation]() if self.use_dropout: self.layers['Dropout' + str(idx)] = Dropout(dropout_ratio) idx = self.hidden_layer_num + 1 self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)], self.params['b' + str(idx)]) self.last_layer = SoftmaxWithLoss()
def __init__(self, width, depth, num_classes=8, num_anchors=9, freeze_bn=False, **kwargs): super(ClassNet, self).__init__(**kwargs) self.width = width self.depth = depth self.num_classes = num_classes self.num_anchors = num_anchors options = { 'kernel_size': 3, 'strides': 1, 'padding': 'same', } kernel_initializer = { 'depthwise_initializer': initializers.VarianceScaling(), 'pointwise_initializer': initializers.VarianceScaling(), } options.update(kernel_initializer) self.convs = [ layers.SeparableConv2D(filters=self.width, bias_initializer='zeros', name=f'{self.name}/class-{i}', **options) for i in range(self.depth) ] self.head = layers.SeparableConv2D( filters=self.num_classes * self.num_anchors, bias_initializer=PriorProbability(probability=0.01), name=f'{self.name}/class-predict', **options) self.bns = [[ BatchNormalization(freeze=freeze_bn, momentum=MOMENTUM, epsilon=EPSILON, name=f'{self.name}/class-{i}-bn-{j}') for j in range(3, 8) ] for i in range(self.depth)] self.activation = layers.Lambda(lambda x: tf.nn.swish(x)) self.reshape = layers.Reshape((-1, self.num_classes)) self.activation_sigmoid = layers.Activation('sigmoid') self.level = 0
def __init__(self, num_classes, arch='SSD512', batch_size=12): super(SSD, self).__init__() self.num_classes = num_classes self.model = get_ssd_model self.arch = arch self.batch_norm = BatchNormalization() self.batch_size = batch_size # self.extra_layers = create_extra_layers() self.conf_head_layers = create_conf_head_layers(num_classes) self.loc_head_layers = create_loc_head_layers() if arch == 'SSD300': # self.extra_layers.pop(-1) self.conf_head_layers.pop(-2) self.loc_head_layers.pop(-2) self.input_shape = [300, 300, 3] elif (arch == "SSD512"): self.input_shape = [512, 512, 3]
def __init_layer(self): activation_layer = {'sigmoid': Sigmoid, 'relu': Relu} self.layers = OrderedDict() for idx in range(1, self.hidden_layer_num + 1): self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)], self.params['b' + str(idx)]) if self.use_batchnorm: self.params['gamma' + str(idx)] = np.ones(self.hidden_size_list[idx - 1]) self.params['beta' + str(idx)] = np.zeros(self.hidden_size_list[idx - 1]) self.layers['BatchNorm' + str(idx)] = BatchNormalization(self.params['gamma' + str(idx)], self.params['beta' + str(idx)]) self.layers['activation_function' + str(idx)] = activation_layer[self.activation]() if self.use_dropout: self.layers['Dropout' + str(idx)] = Dropout(self.dropout_ration) idx += 1 self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)], self.params['b' + str(idx)]) self.last_layer = SoftmaxWithLoss()
def build_network(hidden_layer_sizes: List[int], batch_normalized: bool, regularization: float) -> Network: net = Network() layer_sizes = [CIFAR10.input_size ] + hidden_layer_sizes + [CIFAR10.output_size] for i, (size_in, size_out) in enumerate(zip(layer_sizes[:-1], layer_sizes[1:])): net.add_layer( Linear(size_in, size_out, regularization, Xavier(), name='Li' + str(i + 1))) if i < len(layer_sizes) - 2: if batch_normalized: net.add_layer( BatchNormalization(size_out, name='Bn' + str(i + 1))) net.add_layer(ReLU(size_out, name='Re' + str(i + 1))) else: net.add_layer(Softmax(size_out, name='S')) return net
def test2layergradientsWBN(samples=1, dimensions=3072): print("\n\nTesting 2-layer gradients (WITH BN, NO REG) using a batch size of {}".format(samples)) trainingData, trainingLabels, encodedTrainingLabels = loadData("Datasets/cifar-10-batches-mat/data_batch_1.mat") trainingData = trainingData[0:dimensions, 0:samples] trainingLabels = trainingLabels[0:dimensions, 0:samples] encodedTrainingLabels = encodedTrainingLabels[0:dimensions, 0:samples] network = Model() linear = Linear(dimensions, 50, regularization=0.00, initializer="xavier") network.addLayer(linear) bnlayer = BatchNormalization(50) network.addLayer(bnlayer) network.addLayer(Relu()) linear2 = Linear(50, 10, regularization=0.00, initializer="xavier") network.addLayer(linear2) network.addLayer(Softmax()) sgd = SGD(lr=0.001, lr_decay=1.0, momentum=0.0, shuffle=True) network.compile(sgd, "cce") #network.fit(trainingData, encodedTrainingLabels, epochs=200, validationData=None, batch_size=samples) network.predict(trainingData, updateInternal=True) network.backpropagate(encodedTrainingLabels) timestamp = datetime.now().strftime('%Y-%b-%d--%H-%M-%S') numerical_gradW1 = compute_grads_w_BN(1e-4, linear.W, trainingData, encodedTrainingLabels, network) numerical_gradb1 = compute_grads_w_BN(1e-4, linear.b, trainingData, encodedTrainingLabels, network) numerical_gradgamma = compute_grads_w_BN(1e-4, bnlayer.gamma, trainingData, encodedTrainingLabels, network) numerical_gradbeta = compute_grads_w_BN(1e-4, bnlayer.beta, trainingData, encodedTrainingLabels, network) numerical_gradW2 = compute_grads_w_BN(1e-4, linear2.W, trainingData, encodedTrainingLabels, network) numerical_gradb2 = compute_grads_w_BN(1e-4, linear2.b, trainingData, encodedTrainingLabels, network) print("W1") relative_errorW = grad_difference(linear.gradW, numerical_gradW1) print("b1") relative_errorb = grad_difference(linear.gradb, numerical_gradb1) print("gamma1") relative_errorW = grad_difference(bnlayer.gradGamma, numerical_gradgamma) print("beta1") relative_errorb = grad_difference(bnlayer.gradBeta, numerical_gradbeta) print("W2") relative_errorW2 = grad_difference(linear2.gradW, numerical_gradW2) print("b2") relative_errorb2 = grad_difference(linear2.gradb, numerical_gradb2) print("\n")
img_rows = 28 img_cols = 28 input_shape = (1, img_rows, img_cols) (train_x, train_y), (test_x, test_y) = mnist.load_data() train_x = np.reshape(train_x, (len(train_x), 1, img_rows, img_cols)).astype(skml_config.config.i_type) train_y = convert_to_one_hot(train_y, num_classes) test_x = np.reshape(test_x, (len(test_x), 1, img_rows, img_cols)).astype(skml_config.config.i_type) test_y = convert_to_one_hot(test_y, num_classes) train_x, valid_x, train_y, valid_y = train_test_split(train_x, train_y) filters = 64 model = Sequential() model.add(Convolution(filters, 3, input_shape=input_shape)) model.add(BatchNormalization()) model.add(ReLU()) model.add(MaxPooling(2)) model.add(Convolution(filters, 3)) model.add(BatchNormalization()) model.add(ReLU()) model.add(GlobalAveragePooling()) model.add(Affine(num_classes)) model.compile(SoftmaxCrossEntropy(), Adam()) train_batch_size = 100 valid_batch_size = 1 print("訓練開始: {}".format(datetime.now().strftime("%Y/%m/%d %H:%M"))) model.fit(train_x, train_y, train_batch_size, 20, validation_data=(valid_batch_size, valid_x, valid_y), validation_steps=1) print("訓練終了: {}".format(datetime.now().strftime("%Y/%m/%d %H:%M")))
def main(): trainingData, trainingLabels, \ validationData, validationLabels, \ testingData, testingLabels = loadAllData("Datasets/cifar-10-batches-mat/", valsplit=.10) #Settings 1 #reg = 0.065 #lr = 0.002 #Settings 2 #reg = 0.0021162 #lr = 0.061474 #Settings 3 #reg = 0.0010781 #lr = 0.069686 #Settings 4 #reg = 0.0049132 #lr = 0.07112 #Settings 5 reg = 0.005 lr = 0.007 network = Model() network.addLayer( Linear(32 * 32 * 3, 50, regularization=reg, initializer="he")) network.addLayer(BatchNormalization(50, trainable=True)) network.addLayer(Relu()) network.addLayer(Linear(50, 30, regularization=reg, initializer="he")) network.addLayer(BatchNormalization(30, trainable=True)) network.addLayer(Relu()) network.addLayer(Linear(30, 10, regularization=reg, initializer="he")) network.addLayer(Softmax()) sgd = SGD(lr=lr, lr_decay=0.95, momentum=0.7, shuffle=True, lr_min=1e-5) network.compile(sgd, "cce") timestamp = datetime.now().strftime('%Y-%b-%d--%H-%M-%S') network.fit(trainingData, trainingLabels, epochs=30, batch_size=100, validationData=(validationData, validationLabels)) plotAccuracy( network, "plots/", timestamp, title="3-layer network accuracy over epochs, eta:{}, lambda:{}".format( lr, reg)) plotLoss( network, "plots/", timestamp, title="3-layer network loss over epochs, eta:{}, lambda:{}".format( lr, reg)) loss, acc = network.evaluate(testingData, testingLabels) print("Test loss: {} , Test acc: {}".format(loss, acc))
def convert(keras_model, class_map, description="Neural Network Model"): """ Convert a keras model to PMML @model. The keras model object @class_map. A map in the form {class_id: class_name} @description. A short description of the model Returns a DeepNeuralNetwork object which can be exported to PMML """ pmml = DeepNetwork(description=description, class_map=class_map) pmml.keras_model = keras_model pmml.model_name = keras_model.name config = keras_model.get_config() for layer in config['layers']: layer_class = layer['class_name'] layer_config = layer['config'] layer_inbound_nodes = layer['inbound_nodes'] # Input if layer_class is "InputLayer": pmml._append_layer(InputLayer( name=layer_config['name'], input_size=layer_config['batch_input_shape'][1:] )) # Conv2D elif layer_class is "Conv2D": pmml._append_layer(Conv2D( name=layer_config['name'], channels=layer_config['filters'], kernel_size=layer_config['kernel_size'], dilation_rate=layer_config['dilation_rate'], use_bias=layer_config['use_bias'], activation=layer_config['activation'], strides=layer_config['strides'], padding=layer_config['padding'], inbound_nodes=get_inbound_nodes(layer_inbound_nodes), )) # DepthwiseConv2D elif layer_class is "DepthwiseConv2D": pmml._append_layer(DepthwiseConv2D( name=layer_config['name'], kernel_size=layer_config['kernel_size'], depth_multiplier=layer_config['depth_multiplier'], use_bias=layer_config['use_bias'], activation=layer_config['activation'], strides=layer_config['strides'], padding=layer_config['padding'], inbound_nodes=get_inbound_nodes(layer_inbound_nodes), )) # MaxPooling elif layer_class is "MaxPooling2D": pmml._append_layer(MaxPooling2D( name=layer_config['name'], pool_size=layer_config['pool_size'], strides=layer_config['strides'], inbound_nodes=get_inbound_nodes(layer_inbound_nodes), )) elif layer_class is "AveragePooling2D": pmml._append_layer(AveragePooling2D( name=layer_config['name'], pool_size=layer_config['pool_size'], strides=layer_config['strides'], inbound_nodes=get_inbound_nodes(layer_inbound_nodes), )) elif layer_class is "GlobalAveragePooling2D": pmml._append_layer(GlobalAveragePooling2D( name=layer_config['name'], inbound_nodes=get_inbound_nodes(layer_inbound_nodes), )) # Flatten elif layer_class is "Flatten": pmml._append_layer(Flatten( name=layer_config['name'], inbound_nodes=get_inbound_nodes(layer_inbound_nodes), )) # Dense elif layer_class is "Dense": pmml._append_layer(Dense( name=layer_config['name'], channels=layer_config['units'], use_bias=layer_config['use_bias'], activation=layer_config['activation'], inbound_nodes=get_inbound_nodes(layer_inbound_nodes), )) # Zero padding layer elif layer_class is "ZeroPadding2D": pmml._append_layer(ZeroPadding2D( name=layer_config['name'], padding=layer_config['padding'], inbound_nodes=get_inbound_nodes(layer_inbound_nodes), )) # Reshape layer elif layer_class is "Reshape": pmml._append_layer(Reshape( name=layer_config['name'], target_shape=layer_config['target_shape'], inbound_nodes=get_inbound_nodes(layer_inbound_nodes), )) elif layer_class is "Dropout": pmml._append_layer(Dropout( name=layer_config['name'], inbound_nodes=get_inbound_nodes(layer_inbound_nodes), )) # Batch Normalization elif layer_class is "BatchNormalization": pmml._append_layer(BatchNormalization( name=layer_config['name'], axis=layer_config['axis'], momentum=layer_config['momentum'], epsilon=layer_config['epsilon'], center=layer_config['center'], inbound_nodes=get_inbound_nodes(layer_inbound_nodes), )) elif layer_class is "Add": pmml._append_layer(Merge( name=layer_config['name'], inbound_nodes=get_inbound_nodes(layer_inbound_nodes) )) elif layer_class is "Subtract": pmml._append_layer(Merge( name=layer_config['name'], operator='subtract', inbound_nodes=get_inbound_nodes(layer_inbound_nodes) )) elif layer_class is "Dot": pmml._append_layer(Merge( name=layer_config['name'], operator='dot', inbound_nodes=get_inbound_nodes(layer_inbound_nodes) )) elif layer_class is "Concatenate": pmml._append_layer(Merge( name=layer_config['name'], axis=layer_config['axis'], operator='concatenate', inbound_nodes=get_inbound_nodes(layer_inbound_nodes) )) elif layer_class is "Activation": pmml._append_layer(Activation( name=layer_config['name'], activation=layer_config['activation'], inbound_nodes=get_inbound_nodes(layer_inbound_nodes), )) elif layer_class is "ReLU": pmml._append_layer(Activation( name=layer_config['name'], activation='relu', threshold = layer_config['threshold'], max_value = layer_config['max_value'], negative_slope = layer_config['negative_slope'], inbound_nodes=get_inbound_nodes(layer_inbound_nodes), )) # Unknown layer else: raise ValueError("Unknown layer type:",layer_class) return pmml
def EfficientNet(width_coefficient, depth_coefficient, default_resolution, dropout_rate=0.2, drop_connect_rate=0.2, depth_divisor=8, blocks_args=DEFAULT_BLOCKS_ARGS, model_name='efficientnet', include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000, freeze_bn=False, **kwargs): """Instantiates the EfficientNet architecture using given scaling coefficients. Optionally loads weights pre-trained on ImageNet. Note that the data format convention used by the model is the one specified in your Keras config at `~/.keras/keras.json`. # Arguments width_coefficient: float, scaling coefficient for network width. depth_coefficient: float, scaling coefficient for network depth. default_resolution: int, default input image size. dropout_rate: float, dropout rate before final classifier layer. drop_connect_rate: float, dropout rate at skip connections. depth_divisor: int. blocks_args: A list of BlockArgs to construct block modules. model_name: string, model name. include_top: whether to include the fully-connected layer at the top of the network. weights: one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is False. It should have exactly 3 inputs channels. pooling: optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. # Returns A Keras model instance. # Raises ValueError: in case of invalid argument for `weights`, or invalid input shape. """ global backend, layers, models, keras_utils backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs) features = [] if not (weights in {'imagenet', None} or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError('If using `weights` as `"imagenet"` with `include_top`' ' as true, `classes` should be 1000') # Determine proper input shape input_shape = _obtain_input_shape((224,224,3), default_size=default_resolution, min_size=32, data_format=backend.image_data_format(), require_flatten=include_top, weights=weights) # input_shape = _obtain_input_shape(input_shape, # default_size=default_resolution, # min_size=32, # data_format=backend.image_data_format(), # require_flatten=include_top, # weights=weights) if input_tensor is None: img_input = layers.Input(shape=input_shape) else: if backend.backend() == 'tensorflow': from tensorflow.python.keras.backend import is_keras_tensor else: is_keras_tensor = backend.is_keras_tensor if not is_keras_tensor(input_tensor): img_input = layers.Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 activation = get_swish(**kwargs) # Build stem x = img_input x = layers.Conv2D(round_filters(32, width_coefficient, depth_divisor), 3, strides=(2, 2), padding='same', use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER, name='stem_conv')(x) x = BatchNormalization(freeze=freeze_bn, axis=bn_axis, name='stem_bn')(x) x = layers.Activation(activation, name='stem_activation')(x) # Build blocks num_blocks_total = sum(block_args.num_repeat for block_args in blocks_args) block_num = 0 for idx, block_args in enumerate(blocks_args): assert block_args.num_repeat > 0 # Update block input and output filters based on depth multiplier. block_args = block_args._replace( input_filters=round_filters(block_args.input_filters, width_coefficient, depth_divisor), output_filters=round_filters(block_args.output_filters, width_coefficient, depth_divisor), num_repeat=round_repeats(block_args.num_repeat, depth_coefficient)) # The first block needs to take care of stride and filter size increase. drop_rate = drop_connect_rate * float(block_num) / num_blocks_total x = mb_conv_block(x, block_args, activation=activation, drop_rate=drop_rate, prefix='block{}a_'.format(idx + 1), freeze_bn=freeze_bn ) block_num += 1 if block_args.num_repeat > 1: # pylint: disable=protected-access block_args = block_args._replace( input_filters=block_args.output_filters, strides=[1, 1]) # pylint: enable=protected-access for bidx in xrange(block_args.num_repeat - 1): drop_rate = drop_connect_rate * float(block_num) / num_blocks_total block_prefix = 'block{}{}_'.format( idx + 1, string.ascii_lowercase[bidx + 1] ) x = mb_conv_block(x, block_args, activation=activation, drop_rate=drop_rate, prefix=block_prefix, freeze_bn=freeze_bn ) block_num += 1 if idx < len(blocks_args) - 1 and blocks_args[idx + 1].strides[0] == 2: features.append(x) elif idx == len(blocks_args) - 1: features.append(x) return features
def mb_conv_block(inputs, block_args, activation, drop_rate=None, prefix='', freeze_bn=False): """Mobile Inverted Residual Bottleneck.""" has_se = (block_args.se_ratio is not None) and (0 < block_args.se_ratio <= 1) bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 # workaround over non working dropout with None in noise_shape in tf.keras Dropout = get_dropout( backend=backend, layers=layers, models=models, utils=keras_utils ) # Expansion phase filters = block_args.input_filters * block_args.expand_ratio if block_args.expand_ratio != 1: x = layers.Conv2D(filters, 1, padding='same', use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER, name=prefix + 'expand_conv')(inputs) x = BatchNormalization(freeze=freeze_bn, axis=bn_axis, name=prefix + 'expand_bn')(x) x = layers.Activation(activation, name=prefix + 'expand_activation')(x) else: x = inputs # Depthwise Convolution x = layers.DepthwiseConv2D(block_args.kernel_size, strides=block_args.strides, padding='same', use_bias=False, depthwise_initializer=CONV_KERNEL_INITIALIZER, name=prefix + 'dwconv')(x) x = BatchNormalization(freeze=freeze_bn, axis=bn_axis, name=prefix + 'bn')(x) x = layers.Activation(activation, name=prefix + 'activation')(x) # Squeeze and Excitation phase if has_se: num_reduced_filters = max(1, int( block_args.input_filters * block_args.se_ratio )) se_tensor = layers.GlobalAveragePooling2D(name=prefix + 'se_squeeze')(x) target_shape = (1, 1, filters) if backend.image_data_format() == 'channels_last' else (filters, 1, 1) se_tensor = layers.Reshape(target_shape, name=prefix + 'se_reshape')(se_tensor) se_tensor = layers.Conv2D(num_reduced_filters, 1, activation=activation, padding='same', use_bias=True, kernel_initializer=CONV_KERNEL_INITIALIZER, name=prefix + 'se_reduce')(se_tensor) se_tensor = layers.Conv2D(filters, 1, activation='sigmoid', padding='same', use_bias=True, kernel_initializer=CONV_KERNEL_INITIALIZER, name=prefix + 'se_expand')(se_tensor) if backend.backend() == 'theano': # For the Theano backend, we have to explicitly make # the excitation weights broadcastable. pattern = ([True, True, True, False] if backend.image_data_format() == 'channels_last' else [True, False, True, True]) se_tensor = layers.Lambda( lambda x: backend.pattern_broadcast(x, pattern), name=prefix + 'se_broadcast')(se_tensor) x = layers.multiply([x, se_tensor], name=prefix + 'se_excite') # Output phase x = layers.Conv2D(block_args.output_filters, 1, padding='same', use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER, name=prefix + 'project_conv')(x) x = BatchNormalization(freeze=freeze_bn, axis=bn_axis, name=prefix + 'project_bn')(x) if block_args.id_skip and all( s == 1 for s in block_args.strides ) and block_args.input_filters == block_args.output_filters: if drop_rate and (drop_rate > 0): x = Dropout(drop_rate, noise_shape=(None, 1, 1, 1), name=prefix + 'drop')(x) x = layers.add([x, inputs], name=prefix + 'add') return x
def paramSearch(method="range"): trainingData, trainingLabels, \ validationData, validationLabels, \ testingData, testingLabels = loadAllData("Datasets/cifar-10-batches-mat/", valsplit=0.20) bestLambda = 0.0 bestLR = 0.0 bestValAcc = 0.0 bestLoss = 0.0 bestModel = None data = [[],[],[]] if method == "range": lambdaValues = np.arange(0, 0.05, 0.001) lrValues = np.arange(0.04, 0.08, 0.005) elif method == "sampling": lrValues = np.random.uniform(0.06, 0.07, 15) lambdaValues = np.random.uniform(0.001, 0.005, 15) data.append((lrValues.shape[0], lambdaValues.shape[0])) # Append axis dimensions for 3D plotting for lambdaValue in lambdaValues: for lr in lrValues: print("Lambda:{}".format(lambdaValue)) print("LR:{}".format(lr)) network = Model() network.addLayer(Linear(32*32*3, 50, regularization=lambdaValue, initializer="he")) network.addLayer(BatchNormalization(50, trainable=True)) network.addLayer(Relu()) network.addLayer(Linear(50, 30, regularization=lambdaValue, initializer="he")) network.addLayer(BatchNormalization(30, trainable=True)) network.addLayer(Relu()) network.addLayer(Linear(30,10, regularization=lambdaValue, initializer="he")) network.addLayer(Softmax()) sgd = SGD(lr=lr, lr_decay=0.95, momentum=0.7, shuffle=True, lr_min=1e-5) network.compile(sgd, "cce") timestamp = datetime.now().strftime('%Y-%b-%d--%H-%M-%S') network.fit(trainingData, trainingLabels, epochs=20, validationData=(validationData, validationLabels), batch_size=100, verbose=False) #plotAccuracy(network, "plots/", timestamp) #plotLoss(network, "plots/", timestamp) loss, acc = network.evaluate(validationData, validationLabels) print("Val loss: {} , Val acc: {}".format(loss, acc) ) print("\n\n") data[0].append(lr) data[1].append(lambdaValue) data[2].append(acc) if acc > bestValAcc: bestLambda = lambdaValue bestLR = lr bestValAcc = acc bestLoss = loss bestModel = network loss, acc = bestModel.evaluate(testingData, testingLabels) print("Test loss: {} , Test acc: {}".format(loss, acc) ) print("\n\n") return bestLambda, bestLR, bestValAcc, bestLoss, data
def __init__(self, width, depth, num_iteration_steps, num_anchors=9, freeze_bn=False, use_group_norm=True, num_groups_gn=None, **kwargs): super(IterativeTranslationSubNet, self).__init__(**kwargs) self.width = width self.depth = depth self.num_anchors = num_anchors self.num_iteration_steps = num_iteration_steps self.use_group_norm = use_group_norm self.num_groups_gn = num_groups_gn if backend.image_data_format() == 'channels_first': gn_channel_axis = 1 else: gn_channel_axis = -1 options = { 'kernel_size': 3, 'strides': 1, 'padding': 'same', 'bias_initializer': 'zeros', } kernel_initializer = { 'depthwise_initializer': initializers.VarianceScaling(), 'pointwise_initializer': initializers.VarianceScaling(), } options.update(kernel_initializer) self.convs = [ layers.SeparableConv2D( filters=self.width, name=f'{self.name}/iterative-translation-sub-{i}', **options) for i in range(self.depth) ] self.head_xy = layers.SeparableConv2D( filters=self.num_anchors * 2, name=f'{self.name}/iterative-translation-xy-sub-predict', **options) self.head_z = layers.SeparableConv2D( filters=self.num_anchors, name=f'{self.name}/iterative-translation-z-sub-predict', **options) if self.use_group_norm: self.norm_layer = [[[ GroupNormalization( groups=self.num_groups_gn, axis=gn_channel_axis, name=f'{self.name}/iterative-translation-sub-{k}-{i}-gn-{j}' ) for j in range(3, 8) ] for i in range(self.depth)] for k in range(self.num_iteration_steps)] else: self.norm_layer = [[[ BatchNormalization( freeze=freeze_bn, momentum=MOMENTUM, epsilon=EPSILON, name=f'{self.name}/iterative-translation-sub-{k}-{i}-bn-{j}' ) for j in range(3, 8) ] for i in range(self.depth)] for k in range(self.num_iteration_steps)] self.activation = layers.Lambda(lambda x: tf.nn.swish(x))
y_tr = y_tr.reshape((-1, 1)) y_te = y_te.reshape((-1, 1)) model = Model(verbose=True) batch_size = 1024 n_classes = 10 std = 0.01 reg = 0.0 model.add_layer( Convolution(32, (3, 3), input_shape=(batch_size, X_tr.shape[1], X_tr.shape[2], X_tr.shape[3]), weight_initializer=NormalInitializer(std))) model.add_layer(ReLuActivation()) model.add_layer(BatchNormalization()) model.add_layer( Convolution(32, (3, 3), weight_initializer=NormalInitializer(std), padding='same')) model.add_layer(ReLuActivation()) model.add_layer(MaxPool((2, 2))) model.add_layer(Flatten()) model.add_layer( Affine(100, weight_initializer=NormalInitializer(std), reg=reg)) model.add_layer(ReLuActivation()) model.add_layer(DropoutLayer(drop_rate=0.3)) model.add_layer( Affine(n_classes, weight_initializer=NormalInitializer(std), reg=reg))
def bn_test(): samples = 200 dimensions = 100 print("Performing BN Tests") trainingData, _, _ = loadData("Datasets/cifar-10-batches-mat/data_batch_1.mat") testingData, _, _ = loadData("Datasets/cifar-10-batches-mat/data_batch_2.mat") validationData, _, _ = loadData("Datasets/cifar-10-batches-mat/data_batch_3.mat") trainingData = trainingData[0:dimensions, 0:samples] validationData = validationData[0:dimensions, 0:samples] testingData = testingData[0:dimensions, :] ### MEAN AND VAR TEST ### gamma = np.ones((dimensions, 1 ), dtype=float) beta = np.zeros((dimensions, 1 ), dtype=float) print("Mean and var before") print(np.mean(trainingData, axis=1)) print(np.std(trainingData, axis=1)) bn = BatchNormalization(100, gamma=gamma, beta=beta, trainable=True) data = bn.forward(trainingData, True) print("Mean and std after") print(np.mean(data, axis=1)) print(np.std(data, axis=1)) ######################## ###### GAMMA AND BETA TEST ##### #gamma = np.ones((dimensions, 1 ), dtype=float) + 5 #beta = np.zeros((dimensions, 1 ), dtype=float) + 1 gamma = np.array([i for i in range(0,100)]).reshape((dimensions, 1)) beta = np.array([i for i in range(0,100)]).reshape((dimensions, 1)) print("Mean and std before") print(np.mean(trainingData, axis=1)) print(np.std(trainingData, axis=1)) bn = BatchNormalization(100, gamma=gamma, beta=beta, trainable=True) data = bn.forward(trainingData, True) print("Mean and std after") print(np.mean(data, axis=1)) print(np.std(data, axis=1)) ######################### #TESTING TRAIN VS TEST NUMBERS(STD FOR TEST IS VERY HIGH) gamma = np.ones((dimensions, 1 ), dtype=float) beta = np.zeros((dimensions, 1 ), dtype=float) bn = BatchNormalization(100, gamma=gamma, beta=beta, trainable=True, alpha=0.90) for i in range(0,500): batch = np.random.randn(100,8) #batch = testingData[:, np.random.choice(testingData.shape[1], 8)] data = bn.forward(batch, True) data = bn.forward(np.random.randn(100,8), False) print("Mean and std after") print(np.mean(data, axis=1)) print(np.std(data, axis=1))
def prepare_feature_maps_for_BiFPN(C3, C4, C5, num_channels, freeze_bn): """ Prepares the backbone feature maps for the first BiFPN layer Args: C3, C4, C5: The EfficientNet backbone feature maps of the different levels num_channels: Number of channels used in the BiFPN freeze_bn: Boolean indicating if the batch norm layers should be freezed during training or not. Returns: The prepared input feature maps for the first BiFPN layer """ P3_in = C3 P3_in = layers.Conv2D( num_channels, kernel_size=1, padding='same', name='fpn_cells/cell_0/fnode3/resample_0_0_8/conv2d')(P3_in) P3_in = BatchNormalization( freeze=freeze_bn, momentum=MOMENTUM, epsilon=EPSILON, name='fpn_cells/cell_0/fnode3/resample_0_0_8/bn')(P3_in) P4_in = C4 P4_in_1 = layers.Conv2D( num_channels, kernel_size=1, padding='same', name='fpn_cells/cell_0/fnode2/resample_0_1_7/conv2d')(P4_in) P4_in_1 = BatchNormalization( freeze=freeze_bn, momentum=MOMENTUM, epsilon=EPSILON, name='fpn_cells/cell_0/fnode2/resample_0_1_7/bn')(P4_in_1) P4_in_2 = layers.Conv2D( num_channels, kernel_size=1, padding='same', name='fpn_cells/cell_0/fnode4/resample_0_1_9/conv2d')(P4_in) P4_in_2 = BatchNormalization( freeze=freeze_bn, momentum=MOMENTUM, epsilon=EPSILON, name='fpn_cells/cell_0/fnode4/resample_0_1_9/bn')(P4_in_2) P5_in = C5 P5_in_1 = layers.Conv2D( num_channels, kernel_size=1, padding='same', name='fpn_cells/cell_0/fnode1/resample_0_2_6/conv2d')(P5_in) P5_in_1 = BatchNormalization( freeze=freeze_bn, momentum=MOMENTUM, epsilon=EPSILON, name='fpn_cells/cell_0/fnode1/resample_0_2_6/bn')(P5_in_1) P5_in_2 = layers.Conv2D( num_channels, kernel_size=1, padding='same', name='fpn_cells/cell_0/fnode5/resample_0_2_10/conv2d')(P5_in) P5_in_2 = BatchNormalization( freeze=freeze_bn, momentum=MOMENTUM, epsilon=EPSILON, name='fpn_cells/cell_0/fnode5/resample_0_2_10/bn')(P5_in_2) P6_in = layers.Conv2D(num_channels, kernel_size=1, padding='same', name='resample_p6/conv2d')(C5) P6_in = BatchNormalization(freeze=freeze_bn, momentum=MOMENTUM, epsilon=EPSILON, name='resample_p6/bn')(P6_in) P6_in = layers.MaxPooling2D(pool_size=3, strides=2, padding='same', name='resample_p6/maxpool')(P6_in) P7_in = layers.MaxPooling2D(pool_size=3, strides=2, padding='same', name='resample_p7/maxpool')(P6_in) return P3_in, P4_in_1, P4_in_2, P5_in_1, P5_in_2, P6_in, P7_in
def main(): test_id = 1 if test_id == 1: #---------- # Conv Net #---------- optimizer = Adam() data = datasets.load_digits() X = data.data # (1797, 64) y = data.target # Convert to one-hot encoding y = to_categorical(y.astype("int")) # (n_sample, n_class) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=1) # Reshape X to (n_samples, channels, height, width) X_train = X_train.reshape((-1, 1, 8, 8)) X_test = X_test.reshape((-1, 1, 8, 8)) clf = NeuralNetwork(optimizer=optimizer, loss=CrossEntropy, validation_data=(X_test, y_test)) clf.add( Conv2D(n_filters=16, filter_shape=(3, 3), stride=1, input_shape=(1, 8, 8), padding='same')) clf.add(Activation('relu')) clf.add(Dropout(0.25)) clf.add(BatchNormalization()) clf.add( Conv2D(n_filters=32, filter_shape=(3, 3), stride=1, padding='same')) clf.add(Activation('relu')) clf.add(Dropout(0.25)) clf.add(BatchNormalization()) clf.add(Flatten()) # 展平层 clf.add(Dense(256)) # 全连接层 clf.add(Activation('relu')) clf.add(Dropout(0.4)) clf.add(BatchNormalization()) clf.add(Dense(10)) clf.add(Activation('softmax')) print() clf.summary(name="ConvNet") train_err, val_err = clf.fit(X_train, y_train, n_epochs=50, batch_size=64) # Training and validation error plot n = len(train_err) training, = plt.plot(range(n), train_err, label="Training Error") validation, = plt.plot(range(n), val_err, label="Validation Error") plt.legend(handles=[training, validation]) plt.title("Error Plot") plt.ylabel('Error') plt.xlabel('Iterations') plt.show() _, accuracy = clf.test_on_batch(X_test, y_test) print("Accuracy:", accuracy) y_pred = np.argmax(clf.predict(X_test), axis=1) X_test = X_test.reshape(-1, 8 * 8) # Reduce dimension to 2D using PCA and plot the results Plot().plot_in_2d(X_test, y_pred, title="Convolutional Neural Network", accuracy=accuracy, legend_labels=range(10)) if test_id == 2: dataset = MultiClassDataset(n_samples=300, centers=3, n_features=2, center_box=(-10.0, 10.0), cluster_std=1.0, norm=True, one_hot=True) X = dataset.datas y = dataset.labels X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, seed=1) clf = NeuralNetwork(optimizer=optimizer, loss=CrossEntropy, validation_data=(X_test, y_test)) clf.add(Dense(3)) clf.add(Activation('softmax')) clf.summary(name="SoftmaxReg") train_err, val_err = clf.fit(X_train, y_train, n_epochs=50, batch_size=256)
def __init__(self, width, depth, num_values, num_iteration_steps, num_anchors=9, freeze_bn=False, use_group_norm=True, num_groups_gn=None, **kwargs): super(RotationNet, self).__init__(**kwargs) self.width = width self.depth = depth self.num_anchors = num_anchors self.num_values = num_values self.num_iteration_steps = num_iteration_steps self.use_group_norm = use_group_norm self.num_groups_gn = num_groups_gn if backend.image_data_format() == 'channels_first': channel_axis = 0 gn_channel_axis = 1 else: channel_axis = -1 gn_channel_axis = -1 options = { 'kernel_size': 3, 'strides': 1, 'padding': 'same', 'bias_initializer': 'zeros', } kernel_initializer = { 'depthwise_initializer': initializers.VarianceScaling(), 'pointwise_initializer': initializers.VarianceScaling(), } options.update(kernel_initializer) self.convs = [ layers.SeparableConv2D(filters=self.width, name=f'{self.name}/rotation-{i}', **options) for i in range(self.depth) ] self.initial_rotation = layers.SeparableConv2D( filters=self.num_anchors * self.num_values, name=f'{self.name}/rotation-init-predict', **options) if self.use_group_norm: self.norm_layer = [[ GroupNormalization(groups=self.num_groups_gn, axis=gn_channel_axis, name=f'{self.name}/rotation-{i}-gn-{j}') for j in range(3, 8) ] for i in range(self.depth)] else: self.norm_layer = [[ BatchNormalization(freeze=freeze_bn, momentum=MOMENTUM, epsilon=EPSILON, name=f'{self.name}/rotation-{i}-bn-{j}') for j in range(3, 8) ] for i in range(self.depth)] self.iterative_submodel = IterativeRotationSubNet( width=self.width, depth=self.depth - 1, num_values=self.num_values, num_iteration_steps=self.num_iteration_steps, num_anchors=self.num_anchors, freeze_bn=freeze_bn, use_group_norm=self.use_group_norm, num_groups_gn=self.num_groups_gn, name="iterative_rotation_subnet") self.activation = layers.Lambda(lambda x: tf.nn.swish(x)) self.reshape = layers.Reshape((-1, num_values)) self.level = 0 self.add = layers.Add() self.concat = layers.Concatenate(axis=channel_axis)