def __init__(self): self.conv1 = ConvLayer(32, (9, 9), strides=(1, 1), padding='same', name="conv_1") self.conv2 = ConvLayer(64, (3, 3), strides=(2, 2), padding='same', name="conv_2") self.conv3 = ConvLayer(128, (3, 3), strides=(2, 2), padding='same', name="conv_3") self.res1 = ResBlock(128, prefix="res_1") self.res2 = ResBlock(128, prefix="res_2") self.res3 = ResBlock(128, prefix="res_3") self.res4 = ResBlock(128, prefix="res_4") self.res5 = ResBlock(128, prefix="res_5") self.convt1 = ConvTLayer(64, (3, 3), strides=(2, 2), padding='same', name="conv_t_1") self.convt2 = ConvTLayer(32, (3, 3), strides=(2, 2), padding='same', name="conv_t_2") self.conv4 = ConvLayer(3, (9, 9), strides=(1, 1), padding='same', activate=False, name="conv_4") self.tanh = Activation('tanh') self.model = self._get_model()
class cnn(object): class simple_cnn_model(object): def __init__(self, epochs, batch_size, lr): self.epochs = epochs self.batch_size = batch_size self.lr = lr def load_data(self): # load data from cifar100 folder (x_train, y_train), (x_test, y_test) = cifar100(1211506319) return x_train, y_train, x_test, y_test def train_model(self, layers, loss_metrics, x_train, y_train): # build model self.model = Sequential(layers, loss_metrics) # train the model loss = self.model.fit(x_train, y_train, self.epochs, self.lr, self.batch_size, print_output=True) avg_loss = np.mean(np.reshape(loss, (self.epochs, -1)), axis=1) return avg_loss def test_model(self, x_test, y_test): # make a prediction pred_result = self.model.predict(x_test) accuracy = np.mean(pred_result == y_test) return accuracy if __name__ == '__main__': # define model parameters epochs = 15 batch_size = 128 lr = [.1] # define layers layers = (ConvLayer(3, 16, 3), ReluLayer(), MaxPoolLayer(), ConvLayer(16, 32, 3), ReluLayer(), MaxPoolLayer(), FlattenLayer(), FullLayer(2048, 4), SoftMaxLayer()) loss_matrics = CrossEntropyLayer() # build and train model model = simple_cnn_model(epochs, batch_size, lr) x_train, y_train, x_test, y_test = model.load_data() loss = model.train_model(layers, loss_matrics, x_train, y_train) accuracy = model.test_model(x_test, y_test) print("loss: %s" % loss) print("The accuracy of the model is %s" % accuracy)
def __init__(self, width_stages, n_cell_stages, stride_stages, dropout=0): super(NASNet, self).__init__() self.width_stages = width_stages self.n_cell_stages = n_cell_stages self.stride_stages = stride_stages in_channels = 32 first_cell_width = 16 # first conv layer self.first_conv = ConvLayer(3, in_channels, 3, 2, 1, 1, False, False, True, 'relu6', 0, 'weight_bn_act') # first block first_block_config = { "name": "MobileInvertedResidualBlock", "mobile_inverted_conv": { "name": "MBInvertedConvLayer", "in_channels": in_channels, "out_channels": first_cell_width, "kernel_size": 3, "stride": 1, "expand_ratio": 1 }, "shortcut": None } self.first_block = MobileInvertedResidualBlock.build_from_config( first_block_config) in_channels = first_cell_width # blocks self.blocks = nn.ModuleList() for width, n_cell, s in zip(self.width_stages, self.n_cell_stages, self.stride_stages): for i in range(n_cell): if i == 0: stride = s else: stride = 1 block = WSMobileInvertedResidualBlock(in_channels, width, stride) in_channels = width self.blocks.append(block) self.feature_mix_layer = ConvLayer(in_channels, 1280, 1, 1, 1, 1, False, False, True, 'relu6', 0, 'weight_bn_act') self.global_avg_pooling = nn.AdaptiveAvgPool2d(1) self.classifier = LinearLayer(1280, 1000, True, False, None, dropout, 'weight_bn_act')
def mnist(output_size=10): conv_config = [{ 'channels': 1, 'kernel': (3, 3), 'stride': (1, 1), 'activation': 'relu' }] config = [{'out': 20, 'activation': 'relu'}] input_shape = [28, 28, 1] output_size = output_size depth = 3 width = 10 max_modules_pr_layer = 3 min_modules_pr_layer = 1 learning_rate = 0.0001 optimizer_type = Adam loss = 'categorical_crossentropy' flatten_in_unique = True layers = [] #layers.append(DenseLayer(width, 'L0', config, flatten=not flatten_in_unique)) #layers.append(DenseLayer(width, 'L1', config)) #layers.append(DenseLayer(width, 'L2', config)) layers.append(ConvLayer(width, 'L0', conv_config)) layers.append(ConvLayer(width, 'L1', conv_config)) layers.append(ConvLayer(width, 'L2', conv_config, maxpool=True)) Layer.initialize_whole_network(layers, input_shape) task = TaskContainer(input_shape, output_size, flatten_in_unique, name='unique_mnist', optimizer=optimizer_type, loss=loss, lr=learning_rate) pathnet = PathNet(input_shape=input_shape, width=width, depth=depth, max_active_modules=20) pathnet._layers = layers pathnet._tasks = [task] pathnet.max_modules_pr_layer = max_modules_pr_layer pathnet.min_modules_pr_layer = min_modules_pr_layer for layer in pathnet._layers: layer.save_initialized_weights() return pathnet, task
def make_encoder_layers(input_size: int, latent_dim: int = 100, hidden_channel: int = 128, last_act: str = None) -> list: # mnist, fmnist if input_size == 28: layers = [ ConvLayer(1, hidden_channel, 4, 2, 1, True, "leakyrelu"), ConvLayer(hidden_channel, hidden_channel * 2, 4, 2, 1, True, "leakyrelu"), ConvLayer(hidden_channel * 2, hidden_channel * 4, 4, 2, 1, True, "leakyrelu"), ConvLayer(hidden_channel * 4, latent_dim, 3, 1, 0, False, last_act), ] # cifar10 elif input_size == 32: layers = [ ConvLayer(3, hidden_channel, 4, 2, 1, True, "leakyrelu"), ConvLayer(hidden_channel, hidden_channel * 2, 4, 2, 1, True, "leakyrelu"), ConvLayer(hidden_channel * 2, hidden_channel * 4, 4, 2, 1, True, "leakyrelu"), ConvLayer(hidden_channel * 4, latent_dim, 4, 1, 0, False, last_act), ] else: raise ValueError return layers
def make_discriminator_layers(input_size: int, hidden_channel: int = 128) -> list: # mnist, fmnist if input_size == 28: layers = [ ConvLayer(1, hidden_channel, 4, 2, 1, True, "leakyrelu"), ConvLayer(hidden_channel, hidden_channel * 2, 4, 2, 1, True, "leakyrelu"), ConvLayer(hidden_channel * 2, hidden_channel * 4, 4, 2, 1, True, "leakyrelu"), ConvLayer(hidden_channel * 4, 1, 3, 1, 0, False), ] # cifar10 elif input_size == 32: layers = [ ConvLayer(3, hidden_channel, 4, 2, 1, True, "leakyrelu"), ConvLayer(hidden_channel, hidden_channel * 2, 4, 2, 1, True, "leakyrelu"), ConvLayer(hidden_channel * 2, hidden_channel * 4, 4, 2, 1, True, "leakyrelu"), ConvLayer(hidden_channel * 4, 1, 4, 1, 0, False), ] else: raise ValueError return layers
def encoder(self, inputs): # convolutional layer conv1 = ConvLayer(input_filters=tf.cast(inputs.shape[3], tf.int32), output_filters=8, act=tf.nn.relu, kernel_size=3, kernel_stride=1, kernel_padding="SAME") conv1_act = conv1.__call__(inputs) print(conv1_act.shape) # convolutional and pooling layer conv_pool1 = ConvPoolLayer(input_filters=8, output_filters=8, act=tf.nn.relu, kernel_size=3, kernel_stride=1, kernel_padding="SAME", pool_size=3, pool_stride=2, pool_padding="SAME") conv_pool1_act = conv_pool1.__call__(conv1_act) print(conv_pool1_act.shape) # convolutional layer conv2 = ConvLayer(input_filters=8, output_filters=16, act=tf.nn.relu, kernel_size=3, kernel_stride=1, kernel_padding="SAME") conv2_act = conv2.__call__(conv_pool1_act) print(conv2_act.shape) # convolutional and pooling layer conv_pool2 = ConvPoolLayer(input_filters=16, output_filters=16, act=tf.nn.relu, kernel_size=3, kernel_stride=1, kernel_padding="SAME", pool_size=3, pool_stride=2, pool_padding="SAME") conv_pool2_act = conv_pool2.__call__(conv2_act) print(conv_pool2_act.shape) conv3 = ConvLayer(input_filters=16, output_filters=32, act=tf.nn.relu, kernel_size=3, kernel_stride=1, kernel_padding="SAME") conv3_act = conv3.__call__(conv_pool2_act) print(conv3_act.shape) conv_pool3 = ConvPoolLayer(input_filters=32, output_filters=32, act=tf.nn.relu, kernel_size=3, kernel_stride=1, kernel_padding="SAME", pool_size=3, pool_stride=2, pool_padding="SAME") conv_pool3_act = conv_pool3.__call__(conv3_act) print(conv_pool3_act.shape) last_conv_dims = conv_pool3_act.shape[1:] # make output of pooling flatten flatten = tf.reshape(conv_pool3_act, [-1,last_conv_dims[0]*last_conv_dims[1]*last_conv_dims[2]]) print(flatten.shape) weights_encoder = normal_initializer((tf.cast(flatten.shape[1], tf.int32), FLAGS.code_size)) bias_encoder = zero_initializer((FLAGS.code_size)) # apply fully connected layer dense = tf.matmul(flatten, weights_encoder) + bias_encoder print(dense.shape) return dense, last_conv_dims
def build_model(self): layers = [] input_shape = np.array( [self.batch_size, self.x_dim, self.x_dim, self.c_dim]) # layer_1: input_layer ==> [n, 28, 28, 1] x = InputLayer(input_shape) layers.append(x) # layer_2: conv_layer [n, 28, 28, 1] ==> [n, 28, 28, 32] x = ConvLayer(x, output_nums=20, kernel=5, strides=1, padding='SAME', name='conv1') layers.append(x) # layer_4: avgpool_layer [n, 28, 28, 32] ==> [n, 14, 14, 32] x = MaxPoolLayer(x, kernel=2, strides=2, paddind='SAME', name='pool1') layers.append(x) # layer_5: conv_layer [n, 14, 14, 32] ==> [n, 14, 14, 64] x = ConvLayer(x, output_nums=50, kernel=5, strides=1, padding='SAME', name='conv2') layers.append(x) # layer_7: avgpool_layer [n, 14, 14, 64] ==> [n, 7, 7, 64] x = MaxPoolLayer(x, kernel=2, strides=2, padding='SAME', name='pool2') layers.append(x) # layer_8: flatten_layer [n, 7, 7, 64] ==> [n, 7*7*64] x = FlattenLayer(x, name='flatten') layers.append(x) # layer_9: fullconnected_layer [n, 3136] ==> [n, 500] x = DenseLayer(x, output_nums=500, name='dense1') layers.append(x) # layer_10: relu_layer [n, 500] ==> [n, 500] x = ReLULayer(x, name='relu1') layers.append(x) # layer_11: fullconnected_layer [n, 500] ==> [n, 10] x = DenseLayer(x, output_nums=10, name='dense2') layers.append(x) # layer_12: softmax_layer [n, 10] ==> [n, 10] x = SoftMaxLayer(x, name='softmax') layers.append(x) self.layers = layers
def compile_make_fully_convolutional(nnet): # for naming convenience nnet.dense3_layer = nnet.svm_layer pad = 'valid' nnet.dense1_conv_layer = ConvLayer(nnet.maxpool5_layer, num_filters=4096, filter_size=(7, 7), pad=pad, flip_filters=False) relu_ = ReLU(nnet.dense1_conv_layer) nnet.dense2_conv_layer = ConvLayer(relu_, num_filters=4096, filter_size=(1, 1), pad=pad, flip_filters=False) relu_ = ReLU(nnet.dense2_conv_layer) nnet.dense3_conv_layer = ConvLayer(relu_, num_filters=1000, filter_size=(1, 1), pad=pad, flip_filters=False) W_dense1_reshaped = \ nnet.dense1_layer.W.T.reshape(nnet.dense1_conv_layer.W.shape) W_dense2_reshaped = \ nnet.dense2_layer.W.T.reshape(nnet.dense2_conv_layer.W.shape) W_dense3_reshaped = \ nnet.dense3_layer.W.T.reshape(nnet.dense3_conv_layer.W.shape) updates = ((nnet.dense1_conv_layer.W, W_dense1_reshaped), (nnet.dense2_conv_layer.W, W_dense2_reshaped), (nnet.dense3_conv_layer.W, W_dense3_reshaped), (nnet.dense1_conv_layer.b, nnet.dense1_layer.b), (nnet.dense2_conv_layer.b, nnet.dense2_layer.b), (nnet.dense3_conv_layer.b, nnet.dense3_layer.b)) return theano.function([], updates=updates)
def discriminator(self): disc = [ ConvLayer(num_filters=64, kernel_size=4, stride=2, padding="SAME", weights_init=tf.truncated_normal_initializer( stddev=self.weight_stdev), activation=leakyrelu), ConvLayer(num_filters=128, kernel_size=4, stride=2, padding="SAME", weights_init=tf.truncated_normal_initializer( stddev=self.weight_stdev), normalizer=slim.batch_norm, activation=leakyrelu), ConvLayer(num_filters=256, kernel_size=4, stride=2, padding="SAME", weights_init=tf.truncated_normal_initializer( stddev=self.weight_stdev), normalizer=slim.batch_norm, activation=leakyrelu), ConvLayer(num_filters=512, kernel_size=4, stride=2, padding="SAME", weights_init=tf.truncated_normal_initializer( stddev=self.weight_stdev), normalizer=slim.batch_norm, activation=leakyrelu), ConvLayer(num_filters=1, kernel_size=4, stride=1, padding="SAME", weights_init=tf.truncated_normal_initializer( stddev=self.weight_stdev), normalizer=slim.batch_norm, activation=leakyrelu) ] return forward(disc)
def __init__(self, reshaped_input, name='unnamed'): self.name = name self.conv_layer1 = ConvLayer( reshaped_input, filter_shape=(2, 1, 4, 1), #num outs, num ins, size image_shape=(None, 1, None, 1), stride=(1, 1), name=self.name + '_conv1', border_mode=(2, 0), act_fn='relu') self.conv_layer2 = ConvLayer(self.conv_layer1, filter_shape=(4, 2, 2, 1), image_shape=(None, 2, None, 1), stride=(2, 1), name=self.name + '_conv2', border_mode=(0, 0), act_fn='relu') self.conv_layer3 = ConvLayer(self.conv_layer2, filter_shape=(4, 4, 1, 1), image_shape=(None, 4, None, 1), stride=(1, 1), name=self.name + '_conv3', border_mode=(0, 0), act_fn='relu') self.conv_layer4 = ConvLayer(self.conv_layer3, filter_shape=(1, 4, 1, 1), image_shape=(None, 4, None, 1), stride=(1, 1), name=self.name + '_conv4', border_mode=(0, 0), act_fn='tanh') self.output = self.conv_layer4.output self.layers = [ self.conv_layer1, self.conv_layer2, self.conv_layer3, self.conv_layer4 ] self.params = [] for l in self.layers: self.params += l.params
def add_a_random_conv_layer(self): s1 = self.init_feature_size() filter_size = s1, s1 feature_map_size = self.init_feature_map_size() mean = self.init_mean() std = self.init_std() conv_layer = ConvLayer(filter_size=filter_size, feature_map_size=feature_map_size, weight_matrix=[mean, std]) return conv_layer
def cifar10(): conv_config = [{ 'channels': 3, 'kernel': (3, 3), 'stride': (1, 1), 'activation': 'relu' }] dense_config = [{'out': 20, 'activation': 'relu'}] input_shape = [32, 32, 3] output_size = 10 depth = 3 width = 10 max_modules_pr_layer = 3 learning_rate = 0.001 optimizer_type = Adam loss = 'categorical_crossentropy' layers = [] layers.append(ConvLayer(width, 'L0', conv_config)) layers.append(ConvLayer(width, 'L1', conv_config)) layers.append(ConvLayer(width, 'L2', conv_config, maxpool=True)) #layers.append(DenseLayer(width, 'L2', dense_config, flatten=True)) Layer.initialize_whole_network(layers, input_shape) task = TaskContainer(input_shape, output_size, True, name='unique_cifar10', optimizer=optimizer_type, loss=loss, lr=learning_rate) pathnet = PathNet(input_shape=input_shape, width=width, depth=depth) pathnet._layers = layers pathnet._tasks = [task] pathnet.max_modules_pr_layer = max_modules_pr_layer for layer in pathnet._layers: layer.save_initialized_weights() return pathnet, task
def __init__(self, iterations=1, learning_rate=0.5, topo=[('c', 3, 4), ('p', 2), ('c', 3, 4), ('p', 9), ('mlp', 4, 4, 2)], activation_func=(np.tanh, nputils.tanh_deriv)): """ Creates a new convolutional neural network with the given topology (architecture), learning rate and number of iterations. :param iterations: number of iterations for training. :param learning_rate: rate for updating the weights :param topo: defines the architecture of the net. It is a list of tuples. Each tuple represents a layer, where the first element is a character that specifies the type of layer. E.g. 'c' convolutional layer, 'p' pooling layer, 'mlp' fully connected conventional neural network. The next elements in the tuple are layer specific. Convolutional: 2nd element defines the kernel size, e.g. 3 for a 3x3 kernel. 3rd element specifies the number of maps in the layer. Pooling: 2nd element defines the pool patch size, e.g. 2 for a pool patch size of 2x2. MLP: each element defines the layer size for the network. A complete example looks like this: [('c', 3, 4), ('p', 2), ('c', 3, 4), ('p', 9), ('mlp', 4, 4, 2)] """ self.split_ratio = 0.8 self.iterations = iterations self.learning_rate = learning_rate self.layers = [] self.activ_func = activation_func[0] self.deriv_acitv_func = activation_func[1] num_prev_maps = 1 self.topo = topo # parse topology for layer in topo: # convolutional layer if layer[0] == 'c': conv_layer = ConvLayer(num_prev_maps=num_prev_maps, kernel_size=layer[1], num_maps=layer[2]) self.add_layer(conv_layer) num_prev_maps = layer[2] # pooling layer elif layer[0] == 'p': self.add_layer(MaxPoolLayer(layer[1], num_prev_maps)) # multilayer perceptron elif layer[0] == 'mlp': self.mlp = MultilayerPerceptron( list(layer[1:]), do_classification=True, update_method=SimpleUpdate(self.learning_rate), activ_func=(self.activ_func, self.deriv_acitv_func))
def get_active_subnet(self, in_channel, preserve_weight=True): sub_layer = ConvLayer( in_channel, self.active_out_channel, self.kernel_size, self.stride, self.dilation, use_bn=self.use_bn, act_func=self.act_func ) sub_layer = sub_layer.to(get_net_device(self)) if not preserve_weight: return sub_layer sub_layer.conv.weight.data.copy_(self.conv.conv.weight.data[:self.active_out_channel, :in_channel, :, :]) if self.use_bn: copy_bn(sub_layer.bn, self.bn.bn) return sub_layer
def mutate_conv_unit(self, unit, eta): # feature map size, feature map number, mean std fms = unit.filter_width fmn = unit.feature_map_size mean = unit.weight_matrix_mean std = unit.weight_matrix_std new_fms = int( self.pm(self.filter_size_range[0], self.filter_size_range[-1], fms, eta)) new_fmn = int( self.pm(self.featur_map_size_range[0], self.featur_map_size_range[1], fmn, eta)) new_mean = self.pm(self.mean_range[0], self.mean_range[1], mean, eta) new_std = self.pm(self.std_range[0], self.std_range[1], std, eta) conv_layer = ConvLayer(filter_size=[new_fms, new_fms], feature_map_size=new_fmn, weight_matrix=[new_mean, new_std]) return conv_layer
def addConvLayer(self, use_batch_norm=False, **kwargs): """ Add convolutional layer. If batch norm flag is True, the convolutional layer will be followed by a batch-normalization layer """ input_layer = self.input_layer if not self.all_layers \ else self.all_layers[-1] self.n_conv_layers += 1 name = "conv%i" % self.n_conv_layers new_layer = ConvLayer(input_layer, name=name, **kwargs) self.all_layers += (new_layer, ) self.trainable_layers += (new_layer, ) if use_batch_norm: self.n_bn_layers += 1 name = "bn%i" % self.n_bn_layers self.all_layers += (BatchNorm(new_layer, name=name), )
def __init__(self, layers): self._network = [] for layer in layers: layer_type = layer.pop("type") if layer_type == "data": # this is a data layer new_layer = DataLayer(**layer) elif layer_type == "conv": new_layer = ConvLayer(**layer) elif layer_type == "pool": new_layer = PoolLayer(**layer) elif layer_type == "dense": new_layer = DenseLayer(**layer) elif layer_type == "relu": new_layer = ReLULayer() elif layer_type == "loss": new_layer = LossLayer(**layer) else: raise NotImplementedError( "Layer type: {0} not found".format(layer_type)) self._network.append(new_layer) self.initialize()
def encoder(self, inputs): ############################################################################################################# # TODO: Build Convolutional Part of Encoder # # Put sequential layers: # # ConvLayer1 ==> ConvPoolLayer1 ==> ConvLayer2 ==> ConvPoolLayer2 ==> ConvLayer3 ==> ConvPoolLayer3 # # Settings of layers: # # For all ConvLayers: filter size = 3, filter stride = 1, padding type = SAME # # For all ConvPoolLayers: # # Conv : filter size = 3, filter stride = 1, padding type = SAME # # Pooling : pool size = 3, pool stride = 2, padding type = SAME # # Number of Filters: # # num_channel defined in FLAGS (input) ==> 8 ==> 8 ==> 16 ==> 16 ==> 32 ==> 32 # ############################################################################################################# relu = tf.nn.relu # convolutional layer cl1 = ConvLayer(FLAGS.num_channel, 8, relu, 3, 1, 'SAME') conv1 = cl1(inputs) print(conv1.shape) # convolutional and pooling layer cl2 = ConvPoolLayer(8, 8, relu, 3, 1, 'SAME', 3, 2, 'SAME') conv_pool1 = cl2(conv1) print(conv_pool1.shape) # convolutional layer cl3 = ConvLayer(8, 16, relu, 3, 1, 'SAME') conv2 = cl3(conv_pool1) print(conv2.shape) # convolutional and pooling layer cl4 = ConvPoolLayer(16, 16, relu, 3, 1, 'SAME', 3, 2, 'SAME') conv_pool2 = cl4(conv2) print(conv_pool2.shape) cl5 = ConvLayer(16, 32, relu, 3, 1, 'SAME') conv3 = cl5(conv_pool2) print(conv3.shape) cl6 = ConvPoolLayer(32, 32, tf.nn.relu, 3, 1, 'SAME', 3, 2, 'SAME') conv_pool3 = cl6(conv3) print(conv_pool3.shape) ########################################################################## # END OF YOUR CODE # ########################################################################## ########################################################################## # TODO: Make Output Flatten and Apply Transformation # # Please save the last three dimensions of output of the above code # # Save these numbers in a variable called last_conv_dims # # Multiply all these dimensions to find num of features if flatten # # Use tf.reshape to make a tensor flat # # Define some weights and bias and apply linear transformation # # Use normal and zero initializer for weights and bias respectively # # Please store output of transformation in a variable called dense # # Num of features for dense is defined by code_size in FLAG # # Note that there is no need apply any kind of activation function # ########################################################################## # make output of pooling flatten dim = np.prod(conv_pool3.shape[1:]) flatten = tf.reshape(conv_pool3, [-1, dim]) print(flatten.shape) # apply fully connected layer W_fc = normal_initializer(shape=(dim.__int__(), FLAGS.code_size)) B_fc = zero_initializer(shape=FLAGS.code_size) dense = tf.matmul(flatten, W_fc) + B_fc print(dense.shape) ########################################################################## # END OF YOUR CODE # ########################################################################## last_conv_dims = conv_pool3.shape[1:] return dense, last_conv_dims
def __init__(self, rng, params, cost_function='mse', optimizer=RMSprop): lr = params["lr"] batch_size = params["batch_size"] sequence_length = params["seq_length"] # minibatch) X = T.matrix(name="input", dtype=dtype) # batch of sequence of vector Y = T.matrix(name="output", dtype=dtype) # batch of sequence of vector is_train = T.iscalar( 'is_train' ) # pseudo boolean for switching between training and prediction #CNN global parameters. subsample = (1, 1) p_1 = 0.5 border_mode = "same" cnn_batch_size = batch_size pool_size = (2, 2) #Layer1: conv2+pool+drop filter_shape = (128, 1, 10, 10) input_shape = (cnn_batch_size, 1, 144, 176 ) #input_shape= (samples, channels, rows, cols) input = X.reshape(input_shape) c1 = ConvLayer(rng, input, filter_shape, input_shape, border_mode, subsample, activation=nn.relu) p1 = PoolLayer(c1.output, pool_size=pool_size, input_shape=c1.output_shape) dl1 = DropoutLayer(rng, input=p1.output, prob=p_1, is_train=is_train) #Layer2: conv2+pool subsample = (1, 1) filter_shape = (256, p1.output_shape[1], 3, 3) c2 = ConvLayer(rng, dl1.output, filter_shape, p1.output_shape, border_mode, subsample, activation=nn.relu) p2 = PoolLayer(c2.output, pool_size=pool_size, input_shape=c2.output_shape) #Layer3: conv2+pool filter_shape = (256, p2.output_shape[1], 3, 3) c3 = ConvLayer(rng, p2.output, filter_shape, p2.output_shape, border_mode, subsample, activation=nn.relu) p3 = PoolLayer(c3.output, pool_size=pool_size, input_shape=c3.output_shape) #Layer4: conv2+pool filter_shape = (128, p3.output_shape[1], 3, 3) c4 = ConvLayer(rng, p3.output, filter_shape, p3.output_shape, border_mode, subsample, activation=nn.relu) p4 = PoolLayer(c4.output, pool_size=pool_size, input_shape=c4.output_shape) #Layer5: hidden n_in = reduce(lambda x, y: x * y, p4.output_shape[1:]) x_flat = p4.output.flatten(2) h1 = HiddenLayer(rng, x_flat, n_in, 1024, activation=nn.relu) #Layer6: hidden lreg = LogisticRegression(rng, h1.output, 1024, params['n_output']) self.output = lreg.y_pred self.params = c1.params + c2.params + c3.params + c4.params + h1.params + lreg.params cost = get_err_fn(self, cost_function, Y) L2_reg = 0.0001 L2_sqr = theano.shared(0.) for param in self.params: L2_sqr += (T.sum(param[0]**2) + T.sum(param[1]**2)) cost += L2_reg * L2_sqr _optimizer = optimizer(cost, self.params, lr=lr) self.train = theano.function(inputs=[X, Y, is_train], outputs=cost, updates=_optimizer.getUpdates(), allow_input_downcast=True) self.predictions = theano.function(inputs=[X, is_train], outputs=self.output, allow_input_downcast=True) self.n_param = count_params(self.params)
def encoder(self, inputs): ############################################################################################################# # TODO: Build Convolutional Part of Encoder # # Put sequential layers: # # ConvLayer1 ==> ConvPoolLayer1 ==> ConvLayer2 ==> ConvPoolLayer2 ==> ConvLayer3 ==> ConvPoolLayer3 # # Settings of layers: # # For all ConvLayers: filter size = 3, filter stride = 1, padding type = SAME # # For all ConvPoolLayers: # # Conv : filter size = 3, filter stride = 1, padding type = SAME # # Pooling : pool size = 3, pool stride = 2, padding type = SAME # # Number of Filters: # # num_channel defined in FLAGS (input) ==> 8 ==> 8 ==> 16 ==> 16 ==> 32 ==> 32 # ############################################################################################################# # convolutional layer relu = tf.nn.relu conv1 = ConvLayer(input_filters=FLAGS.num_channel, output_filters=8, act=relu, kernel_size=3, kernel_stride=1, kernel_padding='SAME')(inputs) print(conv1.shape) # convolutional and pooling layer conv_pool1 = ConvPoolLayer(input_filters=8, output_filters=8, act=relu, kernel_size=3, kernel_stride=1, kernel_padding='SAME', pool_size=3, pool_stride=2, pool_padding='SAME')(conv1) print(conv_pool1.shape) # convolutional layer conv2 = ConvLayer(input_filters=8, output_filters=16, act=relu, kernel_size=3, kernel_stride=1, kernel_padding='SAME')(conv_pool1) print(conv2.shape) # convolutional and pooling layer conv_pool2 = ConvPoolLayer(input_filters=16, output_filters=16, act=relu, kernel_size=3, kernel_stride=1, kernel_padding='SAME', pool_size=3, pool_stride=2, pool_padding='SAME')(conv2) print(conv_pool2.shape) conv3 = ConvLayer(input_filters=16, output_filters=32, act=relu, kernel_size=3, kernel_stride=1, kernel_padding='SAME')(conv_pool2) print(conv3.shape) conv_pool3 = ConvPoolLayer(input_filters=32, output_filters=32, act=relu, kernel_size=3, kernel_stride=1, kernel_padding='SAME', pool_size=3, pool_stride=2, pool_padding='SAME')(conv3) print(conv_pool3.shape) ########################################################################## # END OF YOUR CODE # ########################################################################## ########################################################################## # TODO: Make Output Flatten and Apply Transformation # # Please save the last three dimensions of output of the above code # # Save these numbers in a variable called last_conv_dims # # Multiply all these dimensions to find num of features if flatten # # Use tf.reshape to make a tensor flat # # Define some weights and bias and apply linear transformation # # Use normal and zero initializer for weights and bias respectively # # Please store output of transformation in a variable called dense # # Num of features for dense is defined by code_size in FLAG # # Note that there is no need apply any kind of activation function # ########################################################################## # make output of pooling flatten last_conv_dims = conv_pool3.shape[1:] flatten_dim = np.prod(last_conv_dims) flatten = tf.reshape(conv_pool3, [tf.shape(conv_pool3)[0], flatten_dim]) print(flatten.shape) # apply fully connected layer dense = tf.matmul(flatten, normal_initializer([ flatten_dim, FLAGS.code_size ])) + zero_initializer([FLAGS.code_size]) print(dense.shape) ########################################################################## # END OF YOUR CODE # ########################################################################## return dense, last_conv_dims
def _conv_layer(self, NHWC_X, M, feature_map, filter_size, stride, layer_params=None): if layer_params is None: layer_params = {} NHWC = NHWC_X.shape view = FullView(input_size=NHWC[1:3], filter_size=filter_size, feature_maps=NHWC[3], stride=stride) if self.flags.identity_mean: conv_mean = Conv2dMean(filter_size, NHWC[3], feature_map, stride=stride) else: conv_mean = gpflow.mean_functions.Zero() conv_mean.set_trainable(False) output_shape = image_HW(view.patch_count) + [feature_map] H_X = identity_conv(NHWC_X, filter_size, NHWC[3], feature_map, stride) if len(layer_params) == 0: conv_features = PatchInducingFeatures.from_images( NHWC_X, M, filter_size) else: conv_features = PatchInducingFeatures(layer_params.get('Z')) patch_length = filter_size**2 * NHWC[3] if self.flags.base_kernel == 'rbf': lengthscales = layer_params.get('base_kernel/lengthscales', 5.0) variance = layer_params.get('base_kernel/variance', 5.0) base_kernel = kernels.RBF(patch_length, variance=variance, lengthscales=lengthscales) elif self.flags.base_kernel == 'acos': base_kernel = kernels.ArcCosine(patch_length, order=0) else: raise ValueError("Not a valid base-kernel value") q_mu = layer_params.get('q_mu') q_sqrt = layer_params.get('q_sqrt') conv_layer = ConvLayer(base_kernel=base_kernel, mean_function=conv_mean, feature=conv_features, view=view, white=self.flags.white, gp_count=feature_map, q_mu=q_mu, q_sqrt=q_sqrt) if q_sqrt is None: # Start with low variance. conv_layer.q_sqrt = conv_layer.q_sqrt.value * 1e-5 return conv_layer, H_X
def __init__(self, n_classe): super(MultiView, self).__init__() # Layer 1 self.layer1 = nn.Sequential(OrderedDict([ ('conv1', ConvLayer(3, out_channels=32, filter_size=(3,3), stride=(2,2))) ])) # Layer 2 self.layer2 = nn.Sequential(OrderedDict([ ('conv2a', ConvLayer(32, out_channels=64, filter_size=(3, 3), stride=(2, 2))), ('conv2b', ConvLayer(64, out_channels=64, filter_size=(3, 3), stride=(1, 1))), ('conv2c', ConvLayer(64, out_channels=64, filter_size=(3, 3), stride=(1, 1))), ])) # Layer 3 self.layer3 = nn.Sequential(OrderedDict([ ('conv3a', ConvLayer(64, out_channels=128, filter_size=(3, 3), stride=(1, 1))), ('conv3b', ConvLayer(128, out_channels=128, filter_size=(3, 3), stride=(1, 1))), ('conv3c', ConvLayer(128, out_channels=128, filter_size=(3, 3), stride=(1, 1))), ])) # Layer 4 self.layer4 = nn.Sequential(OrderedDict([ ('conv4a', ConvLayer(128, out_channels=256, filter_size=(3, 3), stride=(1, 1))), ('conv4b', ConvLayer(256, out_channels=256, filter_size=(3, 3), stride=(1, 1))), ('conv4c', ConvLayer(256, out_channels=256, filter_size=(3, 3), stride=(1, 1))), ])) # Layer 5 self.layer5 = nn.Sequential(OrderedDict([ ('conv5a', ConvLayer(256, out_channels=512, filter_size=(3, 3), stride=(1, 1))), ('conv5b', ConvLayer(512, out_channels=512, filter_size=(3, 3), stride=(1, 1))), ('conv5c', ConvLayer(512, out_channels=512, filter_size=(3, 3), stride=(1, 1))), ])) # FC self.max_pool = MaxPooling() self.avg_pool = AvgPooling() self.dropout = nn.Dropout(p=0.5) self.fc1 = nn.Linear(512 * 2, 512 * 2) self.fc2 = nn.Linear(512 * 2, n_classe)
def __init__(self,rng,params,cost_function='mse',optimizer = RMSprop): lr=params["lr"] n_lstm=params['n_hidden'] n_out=params['n_output'] batch_size=params["batch_size"] sequence_length=params["seq_length"] X = T.tensor3() # batch of sequence of vector Y = T.tensor3() # batch of sequence of vector is_train = T.iscalar('is_train') # pseudo boolean for switching between training and prediction #CNN global parameters. subsample=(1,1) p_1=0.5 border_mode="valid" cnn_batch_size=batch_size*sequence_length pool_size=(2,2) #Layer1: conv2+pool+drop filter_shape=(64,1,9,9) input_shape=(cnn_batch_size,1,120,60) #input_shape= (samples, channels, rows, cols) input= X.reshape(input_shape) c1=ConvLayer(rng, input,filter_shape, input_shape,border_mode,subsample, activation=nn.relu) p1=PoolLayer(c1.output,pool_size=pool_size,input_shape=c1.output_shape) dl1=DropoutLayer(rng,input=p1.output,prob=p_1,is_train=is_train) #Layer2: conv2+pool filter_shape=(128,p1.output_shape[1],3,3) c2=ConvLayer(rng, dl1.output, filter_shape,p1.output_shape,border_mode,subsample, activation=nn.relu) p2=PoolLayer(c2.output,pool_size=pool_size,input_shape=c2.output_shape) #Layer3: conv2+pool filter_shape=(128,p2.output_shape[1],3,3) c3=ConvLayer(rng, p2.output,filter_shape,p2.output_shape,border_mode,subsample, activation=nn.relu) p3=PoolLayer(c3.output,pool_size=pool_size,input_shape=c3.output_shape) #Layer4: hidden n_in= reduce(lambda x, y: x*y, p3.output_shape[1:]) x_flat = p3.output.flatten(2) h1=HiddenLayer(rng,x_flat,n_in,1024,activation=nn.relu) n_in=1024 rnn_input = h1.output.reshape((batch_size,sequence_length, n_in)) #Layer5: LSTM self.n_in = n_in self.n_lstm = n_lstm self.n_out = n_out self.W_hy = init_weight((self.n_lstm, self.n_out), rng=rng,name='W_hy', sample= 'glorot') self.b_y = init_bias(self.n_out,rng=rng, sample='zero') layer1=LSTMLayer(rng,0,self.n_in,self.n_lstm) self.params = layer1.params self.params.append(self.W_hy) self.params.append(self.b_y) def step_lstm(x_t,h_tm1,c_tm1): [h_t,c_t,y_t]=layer1.run(x_t,h_tm1,c_tm1) y = T.dot(y_t, self.W_hy) + self.b_y return [h_t,c_t,y] H = T.matrix(name="H",dtype=dtype) # initial hidden state C = T.matrix(name="C",dtype=dtype) # initial hidden state [h_t,c_t,y_vals], _ = theano.scan(fn=step_lstm, sequences=[rnn_input.dimshuffle(1,0,2)], outputs_info=[H, C, None]) self.output = y_vals.dimshuffle(1,0,2) self.params =c1.params+c2.params+c3.params+h1.params+self.params cost=get_err_fn(self,cost_function,Y) L2_reg=0.0001 L2_sqr = theano.shared(0.) for param in self.params: L2_sqr += (T.sum(param ** 2)) cost += L2_reg*L2_sqr _optimizer = optimizer(cost, self.params, lr=lr) self.train = theano.function(inputs=[X,Y,is_train,H,C],outputs=[cost,h_t[-1],c_t[-1]],updates=_optimizer.getUpdates(),allow_input_downcast=True) self.predictions = theano.function(inputs = [X,is_train,H,C], outputs = [self.output,h_t[-1],c_t[-1]],allow_input_downcast=True) self.n_param=count_params(self.params)
def __init__(self, ch_in=1, ch_init=128, n_convs=4, n_classes=2, attention=None): super(HpfConvNet, self).__init__() self.layer0_preproc = HPF(ch_in=ch_in, ch_out=4, kernel_size=5, padding=2) self.layer1_fe_type1 = FeatureExtractionLayer(4, ch_init, n_convs) self.layer2_conv_type1 = ConvLayer(ch_init, 2*ch_init) self.layer3_conv_type1 = ConvLayer(2*ch_init, 2*ch_init) self.layer4_conv_type1 = ConvLayer(2*ch_init, 2*ch_init, attention='sqex') self.layer5_conv_type1 = ConvLayer(2*ch_init, 4*ch_init, stride=2) self.layer6_conv_type1 = ConvLayer(4*ch_init, 4*ch_init) self.layer7_conv_type1 = ConvLayer(4*ch_init, 4*ch_init, attention='sqex') self.layer8_conv_type1 = ConvLayer(4*ch_init, 8*ch_init, stride=2) self.layer9_conv_type1 = ConvLayer(8*ch_init, 8*ch_init) self.layer10_conv_type1 = ConvLayer(8*ch_init, 8*ch_init, attention='sqex') self.layer11_conv_type1 = ConvLayer(8*ch_init, 16*ch_init, stride=2) self.layer12_conv_type1 = ConvLayer(16*ch_init, 16*ch_init) self.layer13_conv_type1 = ConvLayer(16*ch_init, 16*ch_init, attention='sqex') self.layer14_cls_type1 = ClassificationLayer(16*ch_init, n_classes=n_classes) self.initialize_parameters()
def conv_layer(self, dtype, N, C, K, D=1, H=1, W=1, T=1, R=1, S=1, pad_d=0, pad_h=0, pad_w=0, str_d=1, str_h=1, str_w=1, grid_P=0, grid_Q=0, update_size=None): """ Create a new ConvLayer parameter object. This then is passed as an argument to all the convolution operations. N: Number of images in mini-batch C: Number of input feature maps K: Number of output feature maps D: Depth of input image H: Height of input image W: Width of input image T: Depth of filter kernel R: Height of filter kernel S: Width of filter kernel padding: amount of zero-padding around the given edge strides: factor to step the filters by in a given direction grid_P, grid_Q: For the update operation define the size of the grid to distribute the work accross SMs. The smaller the grid, the deeper the MM and hence more accumulation is done in fp32. The bigger the grid, the more the work can be evenly spanned accross the SMs, at the cost of needing more fp16 accumuation operations and increased error. Set to 1,1 for full fp32 accuracy Set to P,Q for maximal distribution of work acrross SMs Set to 0,0 for automactially calculated optimal balance (recommened). Tweaking these params can have a large impact on performance as the L2 cache utilization is greatly effected by them. update_size: override kernel size selection for update. "C64_K64" (fp16 only) "C128_K64" (fp32 only) "C128_K128" (both) dtype: need to know dtype to setup proper kernels and params. Maximum utilization is achieved when N, K and C*R*S*T is a multiple of 64 """ return ConvLayer(self, dtype, N, C, K, D, H, W, T, R, S, pad_d, pad_h, pad_w, str_d, str_h, str_w, grid_P, grid_Q, update_size)
def encoder(self, inputs): # Build Convolutional Part of Encoder # Put sequential layers: # ConvLayer1 ==> ConvPoolLayer1 ==> ConvLayer2 ==> ConvPoolLayer2 ==> ConvLayer3 ==> ConvPoolLayer3 # Settings of layers: # For all ConvLayers: filter size = 3, filter stride = 1, padding type = SAME # For all ConvPoolLayers: # Conv : filter size = 3, filter stride = 1, padding type = SAME # Pooling : pool size = 3, pool stride = 2, padding type = SAME # Number of Filters: # num_channel defined in FLAGS (input) ==> 8 ==> 8 ==> 16 ==> 16 ==> 32 ==> 32 # convolutional layer conv1_class = ConvLayer(input_filters=FLAGS.num_channel, output_filters=8, act=tf.nn.relu, kernel_size=3, kernel_stride=1, kernel_padding='SAME') conv1 = conv1_class(inputs=inputs) print(conv1.shape) # convolutional and pooling layer conv_pool1_class = ConvPoolLayer(input_filters=8, output_filters=8, act=tf.nn.relu, kernel_size=3, kernel_stride=1, kernel_padding='SAME', pool_size=3, pool_stride=2, pool_padding='SAME') conv_pool1 = conv_pool1_class(inputs=conv1) print(conv_pool1.shape) # convolutional layer conv2_class = ConvLayer(input_filters=8, output_filters=16, act=tf.nn.relu, kernel_size=3, kernel_stride=1, kernel_padding='SAME') conv2 = conv2_class(inputs=conv_pool1) print(conv2.shape) # convolutional and pooling layer conv_pool2_class = ConvPoolLayer(input_filters=16, output_filters=16, act=tf.nn.relu, kernel_size=3, kernel_stride=1, kernel_padding='SAME', pool_size=3, pool_stride=2, pool_padding='SAME') conv_pool2 = conv_pool2_class(inputs=conv2) print(conv_pool2.shape) conv3_class = ConvLayer(input_filters=16, output_filters=32, act=tf.nn.relu, kernel_size=3, kernel_stride=1, kernel_padding='SAME') conv3 = conv3_class(inputs=conv_pool2) print(conv3.shape) conv_pool3_class = ConvPoolLayer(input_filters=32, output_filters=32, act=tf.nn.relu, kernel_size=3, kernel_stride=1, kernel_padding='SAME', pool_size=3, pool_stride=2, pool_padding='SAME') conv_pool3 = conv_pool3_class(inputs=conv3) print(conv_pool3.shape) # Make Output Flatten and Apply Transformation # Num of features for dense is defined by code_size in FLAG # make output of pooling flatten WholeShape = tf.shape(conv_pool3) NumSamples = WholeShape[0] last_conv_dims = tf.constant(value=(4, 4, 32), dtype=tf.int32, shape=(3, )) #WholeShape[1:] FlattedShape = tf.reduce_prod(last_conv_dims) flatten = tf.reshape(conv_pool3, shape=[NumSamples, FlattedShape]) print(flatten.shape) # apply fully connected layer W_Trans = normal_initializer(shape=[FlattedShape, FLAGS.code_size]) B_Trans = zero_initializer(shape=[FLAGS.code_size]) dense = tf.nn.xw_plus_b(flatten, W_Trans, B_Trans) print(dense.shape) return dense, last_conv_dims
def __init__(self, mode=None, n_bits=16, ch_in=1, ch_init=128, n_convs=4, n_classes=2, attention="sqex", conv_type="stdz"): super().__init__() mode = mode.lower() if mode == "bsn": self.layer0 = BitplaneSeparation(ch_in=ch_in, n_bits=n_bits, no_weight=True, inv_weight=False) self.layer1_fe = FeatureExtractionLayer(n_bits, ch_init, n_convs) elif mode == "bsn-hpf": self.layer0 = HPF(ch_in, 4, 5, padding=2) self.layer1_fe = FeatureExtractionLayer(4, ch_init, n_convs) elif mode == "bsn-hpf-tlu": self.layer0 = nn.Sequential(HPF(ch_in, 4, 5, padding=2), nn.Conv1d(4, 8, 1), TLU(3.0)) self.layer1_fe = FeatureExtractionLayer(8, ch_init, n_convs) elif mode == "bsn-nobs": self.layer0 = nn.Conv1d(1, 8, 1) self.layer1_fe = FeatureExtractionLayer(8, ch_init, n_convs) else: raise ValueError("mode should be designated: %s" % (mode)) self._name = mode self.layer2_conv = ConvLayer(ch_init, 2 * ch_init, conv_type=conv_type) self.layer3_conv = ConvLayer(2 * ch_init, 2 * ch_init, conv_type=conv_type) self.layer4_conv = ConvLayer(2 * ch_init, 2 * ch_init, conv_type=conv_type, attention=attention) self.layer5_conv = ConvLayer(2 * ch_init, 4 * ch_init, stride=2, conv_type=conv_type) self.layer6_conv = ConvLayer(4 * ch_init, 4 * ch_init, conv_type=conv_type) self.layer7_conv = ConvLayer(4 * ch_init, 4 * ch_init, conv_type=conv_type, attention=attention) self.layer8_conv = ConvLayer(4 * ch_init, 8 * ch_init, stride=2, conv_type=conv_type) self.layer9_conv = ConvLayer(8 * ch_init, 8 * ch_init, conv_type=conv_type) self.layer10_conv = ConvLayer(8 * ch_init, 8 * ch_init, conv_type=conv_type, attention=attention) self.layer11_conv = ConvLayer(8 * ch_init, 16 * ch_init, stride=2, conv_type=conv_type) self.layer12_conv = ConvLayer(16 * ch_init, 16 * ch_init, conv_type=conv_type) self.layer13_conv = ConvLayer(16 * ch_init, 16 * ch_init, conv_type=conv_type, attention=attention) self.layer14_cls = ClassificationLayer(16 * ch_init, n_classes=n_classes) self.initialize_parameters()
def __init__(self, n_classes=1000, bn_param=(0.1, 1e-5), dropout_rate=0.1, base_stage_width=None, width_mult_list=1.0, ks_list=3, expand_ratio_list=6, depth_list=4): """ Args: n_classes: 分类类数 bn_param: bn参数 dropout_rate: 用在哪些层里面呢 width_mult_list: 在单层layer重复一些操作[~~网络基础宽度缩放 X 并不是~~] ks_list: 卷积核的候选大小 expand_ratio_list: 网络宽度/channel数的扩大倍数 depth_list: 网络深度/layer的重复/堆叠次数 """ # int2list 将列表,元组,整数都变为一个列表 self.width_mult_list = int2list(width_mult_list, 1) self.ks_list = int2list(ks_list, 1) self.expand_ratio_list = int2list(expand_ratio_list, 1) self.depth_list = int2list(depth_list, 1) self.base_stage_width = base_stage_width self.width_mult_list.sort() self.ks_list.sort() self.expand_ratio_list.sort() self.depth_list.sort() base_stage_width = [16, 24, 40, 80, 112, 160, 960, 1280] # make_divisible 使得卷积channel数为8的倍数,并以8为基底3舍4入 final_expand_width = [ make_divisible(base_stage_width[-2] * max(self.width_mult_list), 8) for _ in self.width_mult_list ] last_channel = [ make_divisible(base_stage_width[-1] * max(self.width_mult_list), 8) for _ in self.width_mult_list ] # 步长,决定下采样; 激活函数; se指的是,难道是self-attention stride_stages = [1, 2, 2, 2, 1, 2] act_stages = ['relu', 'relu', 'relu', 'h_swish', 'h_swish', 'h_swish'] se_stages = [False, False, True, False, True, True] # 深度的配置除了第一个卷积,其他五层都可能expand if depth_list is None: n_block_list = [1, 2, 3, 4, 2, 3] self.depth_list = [4, 4] print('Use MobileNetV3 Depth Setting') else: n_block_list = [1] + [max(self.depth_list)] * 5 # 宽度/channel数配置 width_list = [] for base_width in base_stage_width[:-2]: width = [ make_divisible(base_width * width_mult, 8) for width_mult in self.width_mult_list ] width_list.append(width) # width_list好想和我想象的功能不太一样,我以为是初始channel的expand倍数 input_channel = width_list[0] # first conv layer if len(set(input_channel)) == 1: first_conv = ConvLayer(3, max(input_channel), kernel_size=3, stride=2, act_func='h_swish') first_block_conv = MBInvertedConvLayer( in_channels=max(input_channel), out_channels=max(input_channel), kernel_size=3, stride=stride_stages[0], expand_ratio=1, act_func=act_stages[0], use_se=se_stages[0], ) else: first_conv = DynamicConvLayer( in_channel_list=int2list(3, len(input_channel)), out_channel_list=input_channel, kernel_size=3, stride=2, act_func='h_swish', ) first_block_conv = DynamicMBConvLayer( in_channel_list=input_channel, out_channel_list=input_channel, kernel_size_list=3, expand_ratio_list=1, stride=stride_stages[0], act_func=act_stages[0], use_se=se_stages[0], ) first_block = MobileInvertedResidualBlock( first_block_conv, IdentityLayer(input_channel, input_channel)) # inverted residual blocks self.block_group_info = [] blocks = [first_block] _block_index = 1 feature_dim = input_channel for width, n_block, s, act_func, use_se in zip(width_list[1:], n_block_list[1:], stride_stages[1:], act_stages[1:], se_stages[1:]): self.block_group_info.append( [_block_index + i for i in range(n_block)]) _block_index += n_block output_channel = width for i in range(n_block): if i == 0: stride = s else: stride = 1 mobile_inverted_conv = DynamicMBConvLayer( in_channel_list=feature_dim, out_channel_list=output_channel, kernel_size_list=ks_list, expand_ratio_list=expand_ratio_list, stride=stride, act_func=act_func, use_se=use_se, ) if stride == 1 and feature_dim == output_channel: shortcut = IdentityLayer(feature_dim, feature_dim) else: shortcut = None blocks.append( MobileInvertedResidualBlock(mobile_inverted_conv, shortcut)) feature_dim = output_channel # final expand layer, feature mix layer & classifier if len(final_expand_width) == 1: final_expand_layer = ConvLayer(max(feature_dim), max(final_expand_width), kernel_size=1, act_func='h_swish') feature_mix_layer = ConvLayer( max(final_expand_width), max(last_channel), kernel_size=1, bias=False, use_bn=False, act_func='h_swish', ) else: final_expand_layer = DynamicConvLayer( in_channel_list=feature_dim, out_channel_list=final_expand_width, kernel_size=1, act_func='h_swish') feature_mix_layer = DynamicConvLayer( in_channel_list=final_expand_width, out_channel_list=last_channel, kernel_size=1, use_bn=False, act_func='h_swish', ) if len(set(last_channel)) == 1: classifier = LinearLayer(max(last_channel), n_classes, dropout_rate=dropout_rate) else: classifier = DynamicLinearLayer(in_features_list=last_channel, out_features=n_classes, bias=True, dropout_rate=dropout_rate) super(OFAMobileNetV3, self).__init__(first_conv, blocks, final_expand_layer, feature_mix_layer, classifier) # set bn param self.set_bn_param(momentum=bn_param[0], eps=bn_param[1]) # runtime_depth self.runtime_depth = [ len(block_idx) for block_idx in self.block_group_info ]
def __init__(self, n_classes=1000, bn_param=(0.1, 1e-5), dropout_rate=0.1, base_stage_width=None, width_mult_list=1.0, ks_list=3, expand_ratio_list=6, depth_list=4): self.width_mult_list = int2list(width_mult_list, 1) self.ks_list = int2list(ks_list, 1) self.expand_ratio_list = int2list(expand_ratio_list, 1) self.depth_list = int2list(depth_list, 1) self.base_stage_width = base_stage_width self.width_mult_list.sort() self.ks_list.sort() self.expand_ratio_list.sort() self.depth_list.sort() base_stage_width = [16, 24, 40, 80, 112, 160, 960, 1280] final_expand_width = [ make_divisible(base_stage_width[-2] * max(self.width_mult_list), 8) for _ in self.width_mult_list ] self.final_expand_width = final_expand_width last_channel = [ make_divisible(base_stage_width[-1] * max(self.width_mult_list), 8) for _ in self.width_mult_list ] self.last_channel = last_channel # stride_stages = [1, 2, 2, 2, 1, 2] stride_stages = [1, 2, 2, 2, 1, 1] act_stages = ['relu', 'relu', 'relu', 'h_swish', 'h_swish', 'h_swish'] se_stages = [False, False, True, False, True, True] if depth_list is None: n_block_list = [1, 2, 3, 4, 2, 3] self.depth_list = [4, 4] print('Use MobileNetV3 Depth Setting') else: n_block_list = [1] + [max(self.depth_list)] * 5 width_list = [] for base_width in base_stage_width[:-2]: width = [ make_divisible(base_width * width_mult, 8) for width_mult in self.width_mult_list ] width_list.append(width) input_channel = width_list[0] # first conv layer # if width_mult_list has only one elem if len(set(input_channel)) == 1: first_conv = ConvLayer(3, max(input_channel), kernel_size=3, stride=2, act_func='h_swish') first_block_conv = MBInvertedConvLayer( in_channels=max(input_channel), out_channels=max(input_channel), kernel_size=3, stride=stride_stages[0], expand_ratio=1, act_func=act_stages[0], use_se=se_stages[0], ) else: first_conv = DynamicConvLayer( in_channel_list=int2list(3, len(input_channel)), out_channel_list=input_channel, kernel_size=3, stride=2, act_func='h_swish', ) first_block_conv = DynamicMBConvLayer( in_channel_list=input_channel, out_channel_list=input_channel, kernel_size_list=3, expand_ratio_list=1, stride=stride_stages[0], act_func=act_stages[0], use_se=se_stages[0], ) first_block = MobileInvertedResidualBlock( first_block_conv, IdentityLayer(input_channel, input_channel)) # inverted residual blocks self.block_group_info = [] blocks = [first_block] _block_index = 1 feature_dim = input_channel for width, n_block, s, act_func, use_se in zip(width_list[1:], n_block_list[1:], stride_stages[1:], act_stages[1:], se_stages[1:]): self.block_group_info.append( [_block_index + i for i in range(n_block)]) _block_index += n_block output_channel = width for i in range(n_block): if i == 0: stride = s else: stride = 1 mobile_inverted_conv = DynamicMBConvLayer( in_channel_list=feature_dim, out_channel_list=output_channel, kernel_size_list=ks_list, expand_ratio_list=expand_ratio_list, stride=stride, act_func=act_func, use_se=use_se, ) if stride == 1 and feature_dim == output_channel: shortcut = IdentityLayer(feature_dim, feature_dim) else: shortcut = None blocks.append( MobileInvertedResidualBlock(mobile_inverted_conv, shortcut)) feature_dim = output_channel # final expand layer, feature mix layer & classifier if len(final_expand_width) == 1: final_expand_layer = ConvLayer(max(feature_dim), max(final_expand_width), kernel_size=1, act_func='h_swish') feature_mix_layer = ConvLayer( max(final_expand_width), max(last_channel), kernel_size=1, bias=False, use_bn=False, act_func='h_swish', ) else: final_expand_layer = DynamicConvLayer( in_channel_list=feature_dim, out_channel_list=final_expand_width, kernel_size=1, act_func='h_swish') feature_mix_layer = DynamicConvLayer( in_channel_list=final_expand_width, out_channel_list=last_channel, kernel_size=1, use_bn=False, act_func='h_swish', ) if len(set(last_channel)) == 1: classifier = LinearLayer(max(last_channel), n_classes, dropout_rate=dropout_rate) else: classifier = DynamicLinearLayer(in_features_list=last_channel, out_features=n_classes, bias=True, dropout_rate=dropout_rate) super(OFAMobileNetV3, self).__init__(first_conv, blocks, final_expand_layer, feature_mix_layer, classifier) # set bn param self.set_bn_param(momentum=bn_param[0], eps=bn_param[1]) # runtime_depth self.runtime_depth = [ len(block_idx) for block_idx in self.block_group_info ]