def __init__(self,
                 units,
                 num_stage,
                 filter_list,
                 ratio_list,
                 num_class,
                 num_group,
                 data_type,
                 drop_out,
                 bn_mom=0.9,
                 **kwargs):
        super(se_resnext, self).__init__(**kwargs)
        num_unit = len(units)
        assert (num_unit == num_stage)

        self.conv0 = nn.Conv2D(in_channels=3,
                               channels=filter_list[0],
                               kernel_size=(7, 7),
                               strides=(2, 2),
                               padding=(3, 3),
                               use_bias=False,
                               prefix='conv0_')
        self.bn0 = nn.BatchNorm(in_channels=filter_list[0],
                                epsilon=2e-5,
                                momentum=bn_mom,
                                prefix='batchnorm0_')
        self.relu0 = nn.Activation(activation='relu', prefix='relu0_')
        self.relu0min = NReLu(prefix='relu0min_')
        self.pool0 = nn.MaxPool2D(pool_size=(3, 3),
                                  strides=(2, 2),
                                  padding=(1, 1),
                                  prefix='pool0_')

        self.residual_stages = nn.HybridSequential(prefix='residual_')
        for i in range(num_stage):
            self.residual_stages.add(
                residual_unit(in_channels=filter_list[i],
                              num_filter=filter_list[i + 1],
                              ratio=ratio_list[2],
                              strides=(1 if i == 0 else 2, 1 if i == 0 else 2),
                              dim_match=False,
                              name='stage%d_unit%d' % (i + 1, 1),
                              num_group=num_group,
                              bn_mom=bn_mom,
                              prefix='stage%d_unit%d_' % (i + 1, 1)))
            for j in range(units[i] - 1):
                self.residual_stages.add(
                    residual_unit(in_channels=filter_list[i + 1],
                                  num_filter=filter_list[i + 1],
                                  ratio=ratio_list[2],
                                  strides=(1, 1),
                                  dim_match=True,
                                  name='stage%d_unit%d' % (i + 1, j + 2),
                                  num_group=num_group,
                                  bn_mom=bn_mom,
                                  prefix='stage%d_unit%d_' % (i + 1, j + 2)))

        self.pool1 = nn.GlobalAvgPool2D(prefix='pool1_')
        self.flatten1 = nn.Flatten(prefix='flatten1_')
        self.drop1 = nn.Dropout(rate=drop_out, prefix='dp1_')

        self.fc = nn.Dense(units=num_class,
                           in_units=filter_list[-1],
                           prefix='dense_')
    for (num_convs, channels) in architecture:
        out.add(vgg_block(num_convs, channels))
    return out


###############################################################
# model and params
num_outputs = 10
architecture = ((2, 64), (2, 128), (4, 256), (4, 512), (4, 512))
net = nn.HybridSequential()
# add name_scope on the outermost Sequential
# 8 conv layer + 3 denses = VGG 11
# 13 conv layer + 3 denses = VGG 16
# 16 conv layer + 3 denses = VGG 19
with net.name_scope():
    net.add(vgg_stack(architecture), nn.Flatten(),
            nn.Dense(4096, activation="relu"), nn.Dropout(.5),
            nn.Dense(4096, activation="relu"), nn.Dropout(.5),
            nn.Dense(num_outputs))

###############################################################

############### 그래프 ###############
import gluoncv
gluoncv.utils.viz.plot_network(net, shape=(64, 3, 224, 224))
#####################################

##### 최적화 #####
net.collect_params().initialize(mx.init.Xavier(), ctx=ctx)

trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.01})
Esempio n. 3
0
    def __init__(self,
                 channels,
                 init_block_channels,
                 init_block_kernel_size,
                 init_block_padding,
                 rs,
                 bws,
                 incs,
                 groups,
                 b_case,
                 for_training,
                 test_time_pool,
                 in_channels=3,
                 in_size=(224, 224),
                 classes=1000,
                 **kwargs):
        super(DPN, self).__init__(**kwargs)
        self.in_size = in_size
        self.classes = classes

        with self.name_scope():
            self.features = DualPathSequential(return_two=False,
                                               first_ordinals=1,
                                               last_ordinals=0,
                                               prefix="")
            self.features.add(
                DPNInitBlock(in_channels=in_channels,
                             out_channels=init_block_channels,
                             kernel_size=init_block_kernel_size,
                             padding=init_block_padding))
            in_channels = init_block_channels
            for i, channels_per_stage in enumerate(channels):
                stage = DualPathSequential(prefix="stage{}_".format(i + 1))
                r = rs[i]
                bw = bws[i]
                inc = incs[i]
                with stage.name_scope():
                    for j, out_channels in enumerate(channels_per_stage):
                        has_proj = (j == 0)
                        key_strides = 2 if (j == 0) and (i != 0) else 1
                        stage.add(
                            DPNUnit(in_channels=in_channels,
                                    mid_channels=r,
                                    bw=bw,
                                    inc=inc,
                                    groups=groups,
                                    has_proj=has_proj,
                                    key_strides=key_strides,
                                    b_case=b_case))
                        in_channels = out_channels
                self.features.add(stage)
            self.features.add(DPNFinalBlock(channels=in_channels))

            self.output = nn.HybridSequential(prefix="")
            if for_training or not test_time_pool:
                self.output.add(nn.GlobalAvgPool2D())
                self.output.add(
                    conv1x1(in_channels=in_channels,
                            out_channels=classes,
                            use_bias=True))
                self.output.add(nn.Flatten())
            else:
                self.output.add(nn.AvgPool2D(pool_size=7, strides=1))
                self.output.add(
                    conv1x1(in_channels=in_channels,
                            out_channels=classes,
                            use_bias=True))
                self.output.add(GlobalAvgMaxPool2D())
                self.output.add(nn.Flatten())
 def __init__(self,
              block,
              layers,
              classes=1000,
              dilated=False,
              norm_layer=BatchNorm,
              last_gamma=False,
              **kwargs):
     self.inplanes = 64
     super(ResNetV1b, self).__init__()
     with self.name_scope():
         self.conv1 = nn.Conv2D(in_channels=3,
                                channels=64,
                                kernel_size=7,
                                strides=2,
                                padding=3,
                                use_bias=False)
         self.bn1 = norm_layer(in_channels=64)
         self.relu = nn.Activation('relu')
         self.maxpool = nn.MaxPool2D(pool_size=3, strides=2, padding=1)
         self.layer1 = self._make_layer(1,
                                        block,
                                        64,
                                        layers[0],
                                        norm_layer=norm_layer,
                                        last_gamma=last_gamma)
         self.layer2 = self._make_layer(2,
                                        block,
                                        128,
                                        layers[1],
                                        strides=2,
                                        norm_layer=norm_layer,
                                        last_gamma=last_gamma)
         if dilated:
             self.layer3 = self._make_layer(3,
                                            block,
                                            256,
                                            layers[2],
                                            strides=1,
                                            dilation=2,
                                            norm_layer=norm_layer,
                                            last_gamma=last_gamma)
             self.layer4 = self._make_layer(4,
                                            block,
                                            512,
                                            layers[3],
                                            strides=1,
                                            dilation=4,
                                            norm_layer=norm_layer,
                                            last_gamma=last_gamma)
         else:
             self.layer3 = self._make_layer(3,
                                            block,
                                            256,
                                            layers[2],
                                            strides=2,
                                            norm_layer=norm_layer,
                                            last_gamma=last_gamma)
             self.layer4 = self._make_layer(4,
                                            block,
                                            512,
                                            layers[3],
                                            strides=2,
                                            norm_layer=norm_layer,
                                            last_gamma=last_gamma)
         self.avgpool = nn.AvgPool2D(7)
         self.flat = nn.Flatten()
         self.fc = nn.Dense(in_units=512 * block.expansion, units=classes)
Esempio n. 5
0
from mxnet import nd
from mxnet import gluon
from mxnet.gluon import nn
from mxnet.gluon.data.vision import datasets, transforms
import matplotlib.pyplot as plt

net = nn.Sequential()
net.add(nn.Conv2D(channels=6, kernel_size=5, activation='relu'),
        nn.MaxPool2D(pool_size=2, strides=2),
        nn.Conv2D(channels=16, kernel_size=3, activation='relu'),
        nn.MaxPool2D(pool_size=2, strides=2), nn.Flatten(),
        nn.Dense(120, activation="relu"), nn.Dense(84, activation="relu"),
        nn.Dense(10))

net.load_parameters('net.params')

transformer = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize(0.13, 0.31)])

mnist_valid = datasets.FashionMNIST(train=False)
X, y = mnist_valid[:6]
preds = []
for x in X:
    x = transformer(x).expand_dims(axis=0)
    pred = net(x).argmax(axis=1)
    preds.append(pred.astype('int32').asscalar())

_, figs = plt.subplots(1, 6, figsize=(15, 15))
text_labels = [
    't-shirt', 'trouser', 'pullover', 'dress', 'coat', 'sandal', 'shirt',
Esempio n. 6
0
def main(args):
    if args.gpu == -1:
        ctx = mx.cpu()
    else:
        ctx = mx.gpu(args.gpu)

    with ctx:

        batch_size = args.batch_size

        if ((args.dataset == 'mnist') or (args.dataset == 'fmnist')):
            num_inputs = 28 * 28
            num_outputs = 10
        elif args.dataset == 'chmnist':
            num_inputs = 64 * 64
            num_outputs = 8
        elif args.dataset == 'bcw':
            num_inputs = 30
            num_outputs = 2
        elif args.dataset == 'cifar10':
            num_inputs = 32 * 32 * 3
            num_outputs = 10
        else:
            sys.exit('Not Implemented Dataset!')

        #################################################
        # Multiclass Logistic Regression
        MLR = gluon.nn.Sequential()
        with MLR.name_scope():
            MLR.add(gluon.nn.Dense(num_outputs))

        ########################################################################################################################
        def evaluate_accuracy(data_iterator, net):

            acc = mx.metric.Accuracy()
            for i, (data, label) in enumerate(data_iterator):
                if args.net == 'mlr':
                    data = data.as_in_context(ctx).reshape((-1, num_inputs))
                    label = label.as_in_context(ctx)
                elif args.net == 'dnn10' and (args.dataset == 'mnist'
                                              or args.dataset == 'fmnist'):
                    data = data.as_in_context(ctx).reshape((-1, 1, 28, 28))
                    label = label.as_in_context(ctx)
                elif args.dataset == 'chmnist':
                    data = data.as_in_context(ctx).reshape((-1, 1, 64, 64))
                    label = label.as_in_context(ctx)
                elif args.net == 'dnn2':
                    data = data.as_in_context(ctx).reshape(
                        (-1, 1, 1, num_inputs))
                    label = label.as_in_context(ctx)
                elif args.dataset == 'cifar10':
                    data = data.as_in_context(ctx).reshape((-1, 3, 32, 32))
                    label = label.as_in_context(ctx)
                output = net(data)
                predictions = nd.argmax(output, axis=1)
                if args.dataset == 'chmnist':
                    predictions = predictions.reshape(-1, 1)
                acc.update(preds=predictions, labels=label)
            return acc.get()[1]

        ########################################################################################################################
        # decide attack type
        if args.byz_type == 'partial_trim':
            # partial knowledge trim attack
            byz = byzantine.partial_trim
        elif args.byz_type == 'full_trim':
            # full knowledge trim attack
            byz = byzantine.full_trim
        elif args.byz_type == 'full_krum':
            byz = byzantine.full_krum
        elif args.byz_type == 'no':
            byz = byzantine.no_byz
        else:
            sys.exit('Not Implemented Attack!')

        # decide model architecture
        if args.net == 'mlr':
            net = MLR
            net.collect_params().initialize(mx.init.Xavier(magnitude=1.),
                                            force_reinit=True,
                                            ctx=ctx)
        elif args.net == 'dnn10':
            net = nn.Sequential()
            net.add(nn.Conv2D(channels=30, kernel_size=3, activation='relu'),
                    nn.MaxPool2D(pool_size=2, strides=2),
                    nn.Conv2D(channels=50, kernel_size=3, activation='relu'),
                    nn.MaxPool2D(pool_size=2, strides=2), nn.Flatten(),
                    nn.Dense(200, activation='relu'), nn.Dense(10))
            net.collect_params().initialize(mx.init.Xavier(magnitude=1.),
                                            force_reinit=True,
                                            ctx=ctx)
        elif args.net == 'dnn2':
            net = nn.Sequential()
            net.add(nn.Conv2D(channels=30, kernel_size=1, activation='relu'),
                    nn.MaxPool2D(pool_size=1, strides=1),
                    nn.Conv2D(channels=50, kernel_size=1, activation='relu'),
                    nn.MaxPool2D(pool_size=1, strides=1), nn.Flatten(),
                    nn.Dense(200, activation='relu'), nn.Dense(2))
            net.initialize(init=init.Xavier(), ctx=ctx)
            #net.collect_params().initialize(mx.init.Xavier(magnitude=1.), force_reinit=True, ctx=ctx)
        elif args.net == 'resnet20':
            net = get_model('cifar_resnet20_v1',
                            pretrained=False,
                            classes=8,
                            ctx=ctx)
            net.collect_params().initialize(mx.init.Xavier(), ctx=ctx)

        else:
            sys.exit('Not Implemented model architecture!')

        # define loss
        softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

        # set upt parameters
        num_workers = args.nworkers
        lr = args.lr
        epochs = args.nepochs
        cmax = args.cmax
        dec = args.decay
        grad_list = []
        train_acc_list = []

        # generate a string indicating the parameters
        paraString = str(args.byz_type) + "_" + str(
            args.aggregation) + "_" + str(args.dataset) + "_" + str(
                args.net) + "_lr_" + str(args.lr) + "_bias_" + str(
                    args.bias) + "_m_" + str(args.nworkers) + "_c_" + str(
                        args.nbyz) + "_cmax_" + str(args.cmax) + "_d_" + str(
                            args.decay) + "_batch_" + str(
                                args.batch_size) + "_epochs_" + str(
                                    args.nepochs) + "_"

        # set up seed
        seed = args.seed
        mx.random.seed(seed)
        random.seed(seed)
        np.random.seed(seed)

        # load dataset
        if (args.dataset == 'mnist'):

            def transform(data, label):
                return data.astype(np.float32) / 255, label.astype(np.float32)

            test_data = mx.gluon.data.DataLoader(
                mx.gluon.data.vision.datasets.MNIST(train=False,
                                                    transform=transform),
                500,
                shuffle=False,
                last_batch='rollover')
            train_data = mx.gluon.data.DataLoader(
                mx.gluon.data.vision.datasets.MNIST(train=True,
                                                    transform=transform),
                60000,
                shuffle=True,
                last_batch='rollover')

        elif (args.dataset == 'cifar10'):
            transform_train = transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize([0.4914, 0.4822, 0.4465],
                                     [0.2023, 0.1994, 0.2010])
            ])
            transform_test = transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize([0.4914, 0.4822, 0.4465],
                                     [0.2023, 0.1994, 0.2010])
            ])
            test_data = gluon.data.DataLoader(gluon.data.vision.CIFAR10(
                train=False).transform_first(transform_test),
                                              batch_size=32,
                                              shuffle=False)
            train_data = gluon.data.DataLoader(gluon.data.vision.CIFAR10(
                train=True).transform_first(transform_train),
                                               batch_size=32,
                                               shuffle=True,
                                               last_batch='discard')

        elif (args.dataset == 'fmnist'):

            def transform(data, label):
                return data.astype(np.float32) / 255, label.astype(np.float32)

            test_data = mx.gluon.data.DataLoader(
                mx.gluon.data.vision.datasets.FashionMNIST(
                    train=False, transform=transform),
                500,
                shuffle=False,
                last_batch='rollover')
            train_data = mx.gluon.data.DataLoader(
                mx.gluon.data.vision.datasets.FashionMNIST(
                    train=True, transform=transform),
                60000,
                shuffle=True,
                last_batch='rollover')

        elif (args.dataset == 'chmnist'):
            chdata = genfromtxt('chmnist64_shuffled.csv', delimiter=',')
            train_data_ = chdata[1:4001]
            test_data_ = chdata[4001:]
            train_data = mx.gluon.data.DataLoader(
                mx.gluon.data.dataset.ArrayDataset(
                    train_data_[:, 1:-1].astype(np.float32) / 255,
                    train_data_[:, -1:].astype(np.float32) - 1),
                4000,
                shuffle=False,
                last_batch='rollover')
            test_data = mx.gluon.data.DataLoader(
                mx.gluon.data.dataset.ArrayDataset(
                    test_data_[:, 1:-1].astype(np.float32) / 255,
                    test_data_[:, -1:].astype(np.float32) - 1),
                1000,
                shuffle=True,
                last_batch='rollover')

        elif (args.dataset == 'bcw'):
            data = load_breast_cancer()
            df = pd.DataFrame(data.data, columns=data.feature_names)
            y = data.target
            df = (df - df.mean()) / (df.max() - df.min())
            X_train, X_test, y_train, y_test = train_test_split(
                df, y, test_size=0.20, random_state=69)
            train_data = mx.gluon.data.DataLoader(
                mx.gluon.data.dataset.ArrayDataset(
                    X_train.values.astype(np.float32),
                    y_train.astype(np.float32)),
                455,
                shuffle=False,
                last_batch='rollover')
            test_data = mx.gluon.data.DataLoader(
                mx.gluon.data.dataset.ArrayDataset(
                    X_test.values.astype(np.float32),
                    y_test.astype(np.float32)),
                114,
                shuffle=True,
                last_batch='rollover')
        else:
            sys.exit('Not Implemented dataset!')

        # biased assignment
        bias_weight = args.bias
        other_group_size = (1 - bias_weight) / (num_outputs - 1)
        worker_per_group = num_workers / (num_outputs)

        # assign non-IID training data to each worker
        each_worker_data = [[] for _ in range(num_workers)]
        each_worker_label = [[] for _ in range(num_workers)]

        counter = 0
        server_data = mx.nd.empty((100, 1, 28, 28))
        server_label = mx.nd.empty(100)
        for _, (data, label) in enumerate(train_data):
            for (x, y) in zip(data, label):
                if (args.dataset == 'mnist'
                        or args.dataset == 'fmnist') and args.net == 'mlr':
                    x = x.as_in_context(ctx).reshape(-1, num_inputs)
                if (args.dataset == 'mnist'
                        or args.dataset == 'fmnist') and args.net == 'dnn10':
                    x = x.as_in_context(ctx).reshape(-1, 1, 28, 28)
                if args.dataset == 'chmnist':
                    x = x.as_in_context(ctx).reshape(-1, 1, 64, 64)
                if args.dataset == 'bcw':
                    x = x.as_in_context(ctx).reshape(-1, 1, 1, 30)
                if args.dataset == 'cifar10':
                    x = x.as_in_context(ctx).reshape(-1, 3, 32, 32)
                y = y.as_in_context(ctx)

                # assign a data point to a group
                upper_bound = (y.asnumpy()) * (1 - bias_weight) / (
                    num_outputs - 1) + bias_weight
                lower_bound = (y.asnumpy()) * (1 -
                                               bias_weight) / (num_outputs - 1)
                rd = np.random.random_sample()

                if rd > upper_bound:
                    worker_group = int(
                        np.floor((rd - upper_bound) / other_group_size) +
                        y.asnumpy() + 1)
                elif rd < lower_bound:
                    worker_group = int(np.floor(rd / other_group_size))
                else:
                    worker_group = y.asnumpy()

                # assign a data point to a worker
                rd = np.random.random_sample()
                selected_worker = int(worker_group * worker_per_group +
                                      int(np.floor(rd * worker_per_group)))
                each_worker_data[selected_worker].append(x)
                each_worker_label[selected_worker].append(y)

                if (args.aggregation == 'fltrust'):
                    if (counter < 100):
                        server_data[counter] = x.reshape((1, 28, 28))
                        server_label[counter] = y
        # concatenate the data for each worker
        each_worker_data = [
            nd.concat(*each_worker, dim=0) for each_worker in each_worker_data
        ]
        each_worker_label = [
            nd.concat(*each_worker, dim=0) for each_worker in each_worker_label
        ]
        #pdb.set_trace()
        # random shuffle the workers
        random_order = np.random.RandomState(
            seed=seed).permutation(num_workers)
        each_worker_data = [each_worker_data[i] for i in random_order]
        each_worker_label = [each_worker_label[i] for i in random_order]
        P = 0
        if (args.net == 'mlr'
                and (args.dataset == 'mnist' or args.dataset == 'fmnist')):
            shape = (1, 784)
        elif (args.net == 'dnn10'
              and (args.dataset == 'mnist' or args.dataset == 'fmnist')):
            shape = (1, 1, 28, 28)
        elif (args.dataset == 'chmnist'):
            shape = (1, 1, 64, 64)
        elif (args.dataset == 'bcw'):
            shape = (1, 1, 1, 30)
        elif (args.dataset == 'cifar10'):
            shape = (1, 3, 32, 32)

        dummy_output = net(mx.nd.zeros(shape))
        # count the total number of parameters in the network
        for param in net.collect_params().values():
            if param.grad_req != 'null':
                P = P + len(param.grad().reshape(-1))
        #pdb.set_trace()
        if (args.aggregation
                == 'EULtrim') or (args.aggregation
                                  == 'EULkrum') or (args.aggregation
                                                    == 'EULmedian'):
            if args.dataset == 'mnist':
                valid_dataset = mx.gluon.data.vision.datasets.MNIST(
                    train=False, transform=transform)
            if args.dataset == 'fmnist':
                valid_dataset = mx.gluon.data.vision.datasets.FashionMNIST(
                    train=False, transform=transform)
            sampled = np.random.choice(10000, 100)
            valid_array = mx.gluon.data.dataset.ArrayDataset(
                valid_dataset[sampled[:]][0], valid_dataset[sampled[:]][1])
            valid_data = mx.gluon.data.DataLoader(valid_array,
                                                  100,
                                                  shuffle=True)

            del valid_dataset
            del valid_array
        direction = mx.nd.zeros(P)  #current direction of the global model
        flip_vector = np.empty(epochs)  #flipscore of the global model
        local_flip_vector = np.zeros(
            (epochs, num_workers))  #flipscore of all local models
        local_flip_new = np.zeros((epochs, num_workers))
        active = np.arange(
            num_workers
        )  #used in the earlier version to know which clients aren't blacklisted yet
        blacklist = np.zeros(num_workers)
        susp = nd.zeros(num_workers)  #suspicion score of all clients
        test_acc = np.empty(epochs)
        corrected = epochs  #in which epoch were cmax clients removed
        flag_corrected = 1
        max_flip = 1.0  #used for the whitebox adaptive attack
        client_list = np.ones(
            (epochs, num_workers)) * (-1)  #clients chosen by EUL/FABA etc
        # begin training
        for e in range(epochs):
            #print (lr)
            #if (e == 200): lr = lr/2
            #if (e == 400): lr = lr/2
            if (args.aggregation == 'fltrust'):
                with autograd.record():
                    output = net(server_data)
                    loss = softmax_cross_entropy(output, server_label)
                loss.backward()
                server_params = [
                    param.grad().copy()
                    for param in net.collect_params().values()
                    if param.grad_req != 'null'
                ]
            for i in range(num_workers):
                if (blacklist[i] == 0):
                    # sample a batch
                    minibatch = np.random.choice(list(
                        range(each_worker_data[i].shape[0])),
                                                 size=batch_size,
                                                 replace=False)
                    # forward
                    with autograd.record():
                        output = net(each_worker_data[i][minibatch])
                        loss = softmax_cross_entropy(
                            output, each_worker_label[i][minibatch])
                        # backward
                    loss.backward()
                    grad_list.append([
                        param.grad().copy()
                        for param in net.collect_params().values()
                        if param.grad_req != 'null'
                    ])

            if cmax > 0:
                flag_corrected = 1
            susp = susp / dec
            #lr = get_lr(args.lr, e, epochs)
            if args.aggregation == 'trim1':
                # we aggregate the gradients instead of local model weights in this demo because for the
                # aggregation rules in our setting, it is equivalent to aggregate either of them
                _, direction, cmax, flip_count, lfs = nd_aggregation.trim1(
                    e, grad_list, net, lr, byz, direction, active, blacklist,
                    susp, args.nbyz, cmax, args.utrg, args.udet, args.urem)

                flip_vector[e] = flip_count
                local_flip_vector[e] = lfs.asnumpy()

            if args.aggregation == 'trim':
                # we aggregate the gradients instead of local model weights in this demo because for the
                # aggregation rules in our setting, it is equivalent to aggregate either of them
                _, direction, cmax, flip_count, lfs, lfs_new = nd_aggregation.trim(
                    e, grad_list, net, lr, byz, direction, active, blacklist,
                    susp, args.nbyz, cmax, args.utrg, args.udet, args.urem)

                flip_vector[e] = flip_count
                local_flip_vector[e] = lfs.asnumpy()
                local_flip_new[e] = lfs_new.asnumpy()

            elif args.aggregation == 'fltrust':
                nd_aggregation.fltrust(e, server_params, grad_list, net, lr,
                                       byz, args.nbyz, active)

            elif args.aggregation == 'foolsgold':
                nd_aggregation.foolsgold(e, grad_list, net, lr, byz, args.nbyz,
                                         active)

            elif args.aggregation == 'krum':
                _, direction, cmax, flip_count, lfs, max_flip = nd_aggregation.krum(
                    e, grad_list, net, lr, byz, direction, active, blacklist,
                    susp, args.nbyz, cmax, args.utrg, args.udet, args.urem,
                    max_flip)
                flip_vector[e] = flip_count
                local_flip_vector[e] = lfs.asnumpy()

            elif args.aggregation == 'median':
                _, direction, cmax, flip_count, lfs = nd_aggregation.median(
                    e, grad_list, net, lr, byz, direction, active, blacklist,
                    susp, args.nbyz, cmax, args.utrg, args.udet, args.urem)
                flip_vector[e] = flip_count
                local_flip_vector[e] = lfs.asnumpy()

            elif args.aggregation == 'bulyan':
                _, bul_list = nd_aggregation.bulyan(e, grad_list, net, lr, byz,
                                                    args.nbyz)
                client_list[e] = bul_list

            elif args.aggregation == 'faba':
                faba_list = nd_aggregation.faba(e, grad_list, net, lr, byz,
                                                args.nbyz)
                client_list[e] = faba_list

            elif args.aggregation == 'EULtrim':
                _, eul_list = nd_aggregation.EULtrim(e, grad_list, net, lr,
                                                     byz, valid_data, args.net,
                                                     args.nbyz, args.gpu)
                client_list[e] = eul_list

            elif args.aggregation == 'EULkrum':
                _, eul_list = nd_aggregation.EULkrum(e, grad_list, net, lr,
                                                     byz, valid_data, args.net,
                                                     args.nbyz, args.gpu)
                client_list[e] = eul_list

            elif args.aggregation == 'EULmedian':
                _, eul_list = nd_aggregation.EULmedian(e, grad_list, net, lr,
                                                       byz, valid_data,
                                                       args.net, args.nbyz,
                                                       args.gpu)
                client_list[e] = eul_list

            else:
                sys.exit('Not Implemented aggregation!')

            if (cmax == 0 and flag_corrected == 1):
                corrected = e
                flag_corrected = 0
            # free memory
            del grad_list
            # reset the list
            grad_list = []

            # compute training accuracy every 10 iterations
            '''if (e+1) % 1 == 0:
                pdb.set_trace()
                train_accuracy = evaluate_accuracy(train_data, net)
                train_acc_list.append(train_accuracy)
                print("Epoch %02d. Train_acc %0.4f" % (e, train_accuracy))
            

            # save the training accuracy every 100 iterations
            if (e+1) % 1 == 0:
                if (args.dataset == 'mnist' and args.net == 'mlr'):
                    if not os.path.exists('out_mnist_mlr/'):
                        os.mkdir('out_mnist_mlr/')
                    np.savetxt('out_mnist_mlr/' + paraString, train_acc_list, fmt='%.4f')
                elif (args.dataset == 'mnist' and args.net == 'cnn'):
                    if not os.path.exists('out_mnist_cnn/'):
                        os.mkdir('out_mnist_cnn/')
                    np.savetxt('out_mnist_cnn/' + paraString, train_acc_list, fmt='%.4f')
            '''
            # compute the final testing accuracy
            #if (e+1) == args.nepochs:
            test_accuracy = evaluate_accuracy(test_data, net)
            test_acc[e] = test_accuracy
            print("Epoch %02d. Test_acc %0.4f" % (e, test_accuracy))

        filename = args.filename
        myString = args.aggregation + '_' + args.byz_type + '_' + args.net + '_' + args.dataset + '_' + str(
            args.utrg) + '_' + str(args.udet) + '_' + str(
                args.urem) + '_' + filename + '_'
        if not os.path.exists('Outputs/'):
            os.mkdir('Outputs/')
        #np.save('Outputs/'+paraString+'Flip_old.npy', local_flip_new )
        np.save('Outputs/' + paraString + 'Test_acc.npy', test_acc)
        #np.save('Outputs/'+paraString+'FLip_local_old.npy', local_flip_vector)
        #np.save('Outputs/'+paraString+'Reputation_old.npy', susp.asnumpy())
        net.save_parameters('Outputs/' + paraString + 'net.params')
        ones = nd.ones(num_workers)
        zeros = nd.zeros(num_workers)
        tp = 0
        fp = 0
        tn = 0
        fn = 0
        if (args.aggregation == 'krum' or args.aggregation == 'trim'
                or args.aggregation == 'median'):
            for i in range(epochs):
                sflip = np.argsort(local_flip_vector[i])
                c_removed = len(np.where(local_flip_vector[i] == 0)[0])
                cmax_then = args.cmax - c_removed
                if (cmax_then > 0):
                    tp = tp + len(np.where(sflip[-cmax_then:] < args.nbyz)[0])
                    fp = fp + len(np.where(sflip[-cmax_then:] >= args.nbyz)[0])
                    tn = tn + len(
                        np.where(sflip[c_removed:-cmax_then] >= args.nbyz)[0])
                    fn = fn + len(
                        np.where(sflip[c_removed:-cmax_then] < args.nbyz)[0])
                else:
                    tn = tn + len(np.where(sflip[c_removed:] >= args.nbyz)[0])
                    fn = fn + len(np.where(sflip[c_removed:] < args.nbyz)[0])

        if (args.aggregation == 'bulyan' or args.aggregation == 'faba'
                or args.aggregation == 'EULtrim'
                or args.aggregation == 'EULkrum'
                or args.aggregation == 'EULmedian'):
            for i in range(epochs):
                positives = len(np.where(client_list[i] == -1)[0])
                negatives = num_workers - positives
                tn = tn + len(
                    np.where(client_list[i, :negatives] >= args.nbyz)[0])
                fn = fn + len(
                    np.where(client_list[i, :negatives] < args.nbyz)[0])
                tp = tp + args.nbyz - len(
                    np.where(client_list[i, :negatives] < args.nbyz)[0])
                fp = fp + num_workers - args.nbyz - len(
                    np.where(client_list[i, :negatives] >= args.nbyz)[0])

        print(tp, fp, tn, fn, corrected)
Esempio n. 7
0
#coding:utf-8

import gluonbook as gb
from mxnet import gluon, init
from mxnet.gluon import loss as gloss, nn

drop_prob1 = 0.2
drop_prob2 = 0.5

net = nn.Sequential()
net.add(nn.Flatten())
net.add(nn.Dense(256, activation='relu'))

net.add(nn.Dropout(drop_prob1))
net.add(nn.Dense(256, activation='relu'))
net.add(nn.Dropout(drop_prob2))
net.add(nn.Dense(10))
net.initialize(init.Normal(sigma=0.01))

num_epoch = 40
batch_size = 256
train_iter, test_iter = gb.load_data_fashion_mnist(batch_size)
loss = gloss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.5})
gb.train_ch3(net, train_iter, test_iter, loss, num_epoch, batch_size, None,
             None, trainer)
Esempio n. 8
0
    def __init__(self, depth, ctx, pretrained=True, num_features=0, num_classes=0):
        super(ResNet, self).__init__()
        self.pretrained = pretrained
        self.num_classes = num_classes

        with self.name_scope():
            model1 = ResNet.__factory[depth](pretrained=pretrained, ctx=ctx).features[:-1]
            model1[-1][0].body[0]._kwargs['stride'] = (1, 1)
            model1[-1][0].downsample[0]._kwargs['stride'] = (1, 1)

            model2 = ResNet.__factory[depth](pretrained=pretrained, ctx=ctx).features[:-1]
            model2[-1][0].body[0]._kwargs['stride'] = (1, 1)
            model2[-1][0].downsample[0]._kwargs['stride'] = (1, 1)

            model3 = ResNet.__factory[depth](pretrained=pretrained, ctx=ctx).features[:-1]
            model3[-1][0].body[0]._kwargs['stride'] = (1, 1)
            model3[-1][0].downsample[0]._kwargs['stride'] = (1, 1)

            #backbone
            self.base = nn.HybridSequential()
            for m in model1[:-2]:
                self.base.add(m)
            self.base.add(model1[-2][0])

            #branch 1
            self.branch1 = nn.HybridSequential()
            for m in model1[-2][1:]:
                self.branch1.add(m)
            for m in model1[-1]:
                self.branch1.add(m)

            #branch 2
            self.branch2 = nn.HybridSequential()
            for m in model2[-2][1:]:
                self.branch2.add(m)
            for m in model2[-1]:
                self.branch2.add(m)

            #branch 3
            self.branch3 = nn.HybridSequential()
            for m in model3[-2][1:]:
                self.branch3.add(m)
            for m in model3[-1]:
                self.branch3.add(m)
            

            #local
            self.feat = nn.HybridSequential()
            self.classify = nn.HybridSequential()
            for _ in range(5):
                tmp = nn.HybridSequential()
                tmp.add(nn.GlobalMaxPool2D())
                feat = nn.Conv2D(channels=num_features, kernel_size=1, use_bias=False)
                feat.initialize(init=init.MSRAPrelu('in', 0), ctx=ctx)
                tmp.add(feat)
                bn = nn.BatchNorm()
                bn.initialize(init=init.Zero(), ctx=ctx)
                tmp.add(bn)
                tmp.add(nn.Flatten())
                self.feat.add(tmp)

                classifier = nn.Dense(num_classes, use_bias=False)
                classifier.weight.initialize(init=init.Normal(0.001), ctx=ctx)
                self.classify.add(classifier)

            #global
            self.g_feat = nn.HybridSequential()
            self.g_classify = nn.HybridSequential()
            for _ in range(3):
                tmp = nn.HybridSequential()
                tmp.add(nn.GlobalAvgPool2D())
                feat = nn.Conv2D(channels=num_features, kernel_size=1, use_bias=False)
                feat.initialize(init=init.MSRAPrelu('in', 0), ctx=ctx)
                tmp.add(feat)
                bn = nn.BatchNorm(center=False, scale=True)
                bn.initialize(init=init.Zero(), ctx=ctx)
                tmp.add(bn)
                tmp.add(nn.Flatten())
                self.g_feat.add(tmp)

                classifier = nn.Dense(num_classes, use_bias=False)
                classifier.initialize(init=init.Normal(0.001), ctx=ctx)
                self.g_classify.add(classifier)
    def __init__(self,
                 units,
                 num_stage,
                 filter_list,
                 ratio_list,
                 num_class,
                 num_group,
                 data_type,
                 drop_out,
                 bn_mom=0.9,
                 **kwargs):
        super(resnext, self).__init__(**kwargs)
        num_unit = len(units)
        assert (num_unit == num_stage)
        self.num_class = num_class

        self.bnfirst = nn.BatchNorm(in_channels=3,
                                    epsilon=2e-5,
                                    momentum=bn_mom,
                                    prefix='batchnormfirst_')

        # fw
        self.conv0 = nn.Conv2D(in_channels=3,
                               channels=filter_list[0],
                               kernel_size=(7, 7),
                               strides=(2, 2),
                               padding=(3, 3),
                               use_bias=False,
                               prefix='conv0_')
        self.bn0 = nn.BatchNorm(in_channels=filter_list[0],
                                epsilon=2e-5,
                                momentum=bn_mom,
                                prefix='batchnorm0_')
        self.bias0 = BiasAdder(channels=filter_list[0], prefix='bias0_')
        self.relu0 = nn.Activation(activation='relu', prefix='relu0_')
        self.relu0min = NReLu(prefix='relu0min_')
        self.pool0 = nn.MaxPool2D(pool_size=(2, 2),
                                  strides=(2, 2),
                                  padding=(0, 0),
                                  prefix='pool0_')

        # td
        self.upsample0 = UpsampleLayer(size=2, scale=1., prefix='up0_')
        self.bntd0 = nn.BatchNorm(in_channels=filter_list[0],
                                  epsilon=2e-5,
                                  momentum=bn_mom,
                                  prefix='td_batchnorm0_')
        self.bntd0min = nn.BatchNorm(in_channels=filter_list[0],
                                     epsilon=2e-5,
                                     momentum=bn_mom,
                                     prefix='td_batchnorm0min_')
        self.tdconv0 = nn.Conv2DTranspose(channels=3,
                                          in_channels=filter_list[0],
                                          kernel_size=(7, 7),
                                          strides=(2, 2),
                                          padding=(3, 3),
                                          output_padding=1,
                                          use_bias=False,
                                          params=self.conv0.params,
                                          prefix='td_conv0_')
        self.bntdfinal = nn.BatchNorm(in_channels=3,
                                      epsilon=2e-5,
                                      momentum=bn_mom,
                                      prefix='td_batchnormfinal_')
        self.bntdfinalmin = nn.BatchNorm(in_channels=3,
                                         epsilon=2e-5,
                                         momentum=bn_mom,
                                         prefix='td_batchnormfinalmin_')

        self.residual_stages = nn.HybridSequential(prefix='residual_')
        topdown_list = []
        for i in range(num_stage):
            self.residual_stages.add(
                residual_unit(in_channels=filter_list[i],
                              num_filter=filter_list[i + 1],
                              ratio=ratio_list[2],
                              strides=(1 if i == 0 else 2, 1 if i == 0 else 2),
                              dim_match=False,
                              name='stage%d_unit%d' % (i + 1, 1),
                              num_group=num_group,
                              bn_mom=bn_mom,
                              prefix='stage%d_unit%d_' % (i + 1, 1)))
            topdown_list.append(
                topdown_residual_unit(fwblock=self.residual_stages[-1],
                                      name='stage%d_td_unit%d' % (i + 1, 1),
                                      prefix='stage%d_td_unit%d_' %
                                      (i + 1, 1)))
            for j in range(units[i] - 1):
                self.residual_stages.add(
                    residual_unit(in_channels=filter_list[i + 1],
                                  num_filter=filter_list[i + 1],
                                  ratio=ratio_list[2],
                                  strides=(1, 1),
                                  dim_match=True,
                                  name='stage%d_unit%d' % (i + 1, j + 2),
                                  num_group=num_group,
                                  bn_mom=bn_mom,
                                  prefix='stage%d_unit%d_' % (i + 1, j + 2)))
                topdown_list.append(
                    topdown_residual_unit(
                        fwblock=self.residual_stages[-1],
                        name='stage%d_td_unit%d' % (i + 1, j + 2),
                        prefix='stage%d_td_unit%d_' % (i + 1, j + 2)))

        with self.name_scope():
            self.topdown_stages = nn.HybridSequential(prefix='td_residual_')
            for block in topdown_list[::-1]:
                self.topdown_stages.add(block)

        # fw classifier
        self.pool1 = nn.GlobalAvgPool2D(prefix='pool1_')
        self.drop1 = nn.Dropout(rate=drop_out, prefix='dp1_')
        self.fc = nn.Conv2D(in_channels=filter_list[-1],
                            channels=num_class,
                            kernel_size=(1, 1),
                            use_bias=True,
                            prefix='dense_')
        self.flatten1 = nn.Flatten(prefix='flatten1_')

        # bw classifier
        self.reshape = Reshape(shape=(num_class, 1, 1), prefix='reshape_')
        self.td_drop1 = nn.Dropout(rate=drop_out, prefix='td_dp1_')
        self.td_fc = nn.Conv2DTranspose(channels=filter_list[-1],
                                        in_channels=num_class,
                                        kernel_size=(1, 1),
                                        strides=(1, 1),
                                        use_bias=False,
                                        params=self.fc.params,
                                        prefix='td_dense_')
        self.upsample1 = UpsampleLayer(size=4,
                                       scale=1. / (4**2),
                                       prefix='up1_')
Esempio n. 10
0
    def __init__(self, classes=4, dropout_keep_prob=0.5, **kwargs):
        """400 classes in the Kinetics dataset."""
        super(InceptionI3d, self).__init__(**kwargs)
        self._num_classes = classes
        self.dropout_keep_prob = dropout_keep_prob

        # this is the main classifier
        with self.name_scope():
            self.features = nn.HybridSequential(prefix='')

            # the input shape is `batch_size` x `num_frames` x 224 x 224 x `num_channels` in tf code
            # but gluon is NCDHW
            # input shape is 1, 3, 79, 224, 224

            self.features.add(
                _make_unit3d(channels=64,
                             kernel_size=(7, 7, 7),
                             strides=(2, 2, 2)))
            # shape is (1, 64, 37, 109, 109)

            self.features.add(
                nn.MaxPool3D(pool_size=(1, 3, 3),
                             strides=(1, 2, 2),
                             padding=(0, 55, 55))
            )  # here should be 'same' padding; hard code for now.
            # shape is (1, 64, 37, 109, 109)

            self.features.add(_make_unit3d(channels=64, kernel_size=(1, 1, 1)))
            # shape (1, 64, 37, 109, 109)

            self.features.add(_make_unit3d(channels=192,
                                           kernel_size=(3, 3, 3)))
            # shape (1, 192, 35, 107, 107)

            self.features.add(
                nn.MaxPool3D(pool_size=(1, 3, 3),
                             strides=(1, 2, 2),
                             padding=(0, 54, 54)))  # padding same
            # shape (1, 192, 35, 107, 107)

            self.features.add(_make_mixed_3b('mixed_3b'))

            self.features.add(_make_mixed_3c('mixed_3c'))
            #(1, 480, 35, 107, 107)

            self.features.add(
                nn.MaxPool3D(pool_size=(3, 3, 3),
                             strides=(2, 2, 2),
                             padding=(18, 54, 54)))  # padding is same here

            self.features.add(_make_mixed_4b('mixed_4b'))
            #
            self.features.add(_make_mixed_4c('mixed_4c'))

            self.features.add(_make_mixed_4d('mixed_4d'))

            self.features.add(_make_mixed_4e('mixed_4e'))

            self.features.add(_make_mixed_4f('mixed_4f'))
            # (1, 384, 35, 107, 107)

            self.features.add(
                nn.MaxPool3D(pool_size=(2, 2, 2),
                             strides=(2, 2, 2),
                             padding=(18, 54, 54)))

            self.features.add(_make_mixed_5b('mixed_5b'))

            self.features.add(_make_mixed_5c('mixed_5c'))

            self.features.add(nn.AvgPool3D(pool_size=(2, 7, 7)))

            self.features.add(nn.Dropout(self.dropout_keep_prob))

            self.features.add(
                _make_unit3d(channels=self._num_classes,
                             kernel_size=(1, 1, 1)))

            # logits/main classifier outputs endpoint
            self.output = nn.HybridSequential(prefix='')
            self.output.add(nn.Flatten())
            self.output.add(nn.Dense(self._num_classes))
Esempio n. 11
0
    def __init__(self,
                 channels,
                 init_block_channels,
                 final_block_channels,
                 final_block_groups,
                 dilations,
                 dropout_rate=0.2,
                 bn_use_global_stats=False,
                 in_channels=3,
                 in_size=(224, 224),
                 classes=1000):
        super(ESPNetv2, self).__init__()
        self.in_size = in_size
        self.classes = classes
        x0_channels = in_channels

        with self.name_scope():
            self.features = DualPathSequential(
                return_two=False,
                first_ordinals=0,
                last_ordinals=2,
                prefix="")
            self.features.add(ESPInitBlock(
                in_channels=in_channels,
                out_channels=init_block_channels,
                bn_use_global_stats=bn_use_global_stats))
            in_channels = init_block_channels
            for i, channels_per_stage in enumerate(channels):
                stage = DualPathSequential(prefix="stage{}_".format(i + 1))
                for j, out_channels in enumerate(channels_per_stage):
                    if j == 0:
                        unit = DownsampleBlock(
                            in_channels=in_channels,
                            out_channels=out_channels,
                            x0_channels=x0_channels,
                            dilations=dilations[i][j],
                            bn_use_global_stats=bn_use_global_stats)
                    else:
                        unit = ESPBlock(
                            in_channels=in_channels,
                            out_channels=out_channels,
                            strides=1,
                            dilations=dilations[i][j],
                            bn_use_global_stats=bn_use_global_stats)
                    stage.add(unit)
                    in_channels = out_channels
                self.features.add(stage)
            self.features.add(ESPFinalBlock(
                in_channels=in_channels,
                out_channels=final_block_channels,
                final_groups=final_block_groups,
                bn_use_global_stats=bn_use_global_stats))
            in_channels = final_block_channels
            self.features.add(nn.AvgPool2D(
                pool_size=7,
                strides=1))

            self.output = nn.HybridSequential(prefix="")
            self.output.add(nn.Flatten())
            self.output.add(nn.Dropout(rate=dropout_rate))
            self.output.add(nn.Dense(
                units=classes,
                in_units=in_channels))
Esempio n. 12
0
def get_net():
    net = nn.Sequential()
    with net.name_scope():
        net.add(nn.Flatten(), nn.Dense(120, activation="relu"),
                nn.Dense(84, activation="relu"), nn.Dense(10))
    return net
Esempio n. 13
0
    def __init__(self,
                 layers,
                 stem_width=32,
                 dropblock_prob=0.0,
                 final_drop=0.0,
                 input_size=224,
                 in_channels=3,
                 in_size=(224, 244),
                 classes=1000):
        self.in_size = in_size
        self.classes = classes

        block = Bottleneck
        avg_down = True
        cardinality = 1
        avd = True
        avd_first = False
        use_splat = True
        bottleneck_width = 64
        radix = 2
        split_drop_ratio = 0

        dilated = False
        dilation = 1
        norm_layer = BatchNorm
        norm_kwargs = None
        last_gamma = False

        self.cardinality = cardinality
        self.bottleneck_width = bottleneck_width
        self.inplanes = stem_width * 2
        self.radix = radix
        self.split_drop_ratio = split_drop_ratio
        self.avd_first = avd_first
        super(ResNeSt, self).__init__(prefix='resnest_')
        norm_kwargs = norm_kwargs if norm_kwargs is not None else {}
        self.norm_kwargs = norm_kwargs
        with self.name_scope():
            self.conv1 = nn.HybridSequential(prefix='conv1')
            self.conv1.add(
                nn.Conv2D(channels=stem_width,
                          kernel_size=3,
                          strides=2,
                          padding=1,
                          use_bias=False,
                          in_channels=3))
            self.conv1.add(norm_layer(in_channels=stem_width, **norm_kwargs))
            self.conv1.add(nn.Activation('relu'))
            self.conv1.add(
                nn.Conv2D(channels=stem_width,
                          kernel_size=3,
                          strides=1,
                          padding=1,
                          use_bias=False,
                          in_channels=stem_width))
            self.conv1.add(norm_layer(in_channels=stem_width, **norm_kwargs))
            self.conv1.add(nn.Activation('relu'))
            self.conv1.add(
                nn.Conv2D(channels=stem_width * 2,
                          kernel_size=3,
                          strides=1,
                          padding=1,
                          use_bias=False,
                          in_channels=stem_width))

            input_size = _update_input_size(input_size, 2)
            self.bn1 = norm_layer(in_channels=stem_width * 2, **norm_kwargs)
            self.relu = nn.Activation('relu')
            self.maxpool = nn.MaxPool2D(pool_size=3, strides=2, padding=1)
            input_size = _update_input_size(input_size, 2)
            self.layer1 = self._make_layer(1,
                                           block,
                                           64,
                                           layers[0],
                                           avg_down=avg_down,
                                           norm_layer=norm_layer,
                                           last_gamma=last_gamma,
                                           use_splat=use_splat,
                                           avd=avd)
            self.layer2 = self._make_layer(2,
                                           block,
                                           128,
                                           layers[1],
                                           strides=2,
                                           avg_down=avg_down,
                                           norm_layer=norm_layer,
                                           last_gamma=last_gamma,
                                           use_splat=use_splat,
                                           avd=avd)
            input_size = _update_input_size(input_size, 2)
            if dilated or dilation == 4:
                self.layer3 = self._make_layer(3,
                                               block,
                                               256,
                                               layers[2],
                                               strides=1,
                                               dilation=2,
                                               avg_down=avg_down,
                                               norm_layer=norm_layer,
                                               last_gamma=last_gamma,
                                               dropblock_prob=dropblock_prob,
                                               input_size=input_size,
                                               use_splat=use_splat,
                                               avd=avd)
                self.layer4 = self._make_layer(4,
                                               block,
                                               512,
                                               layers[3],
                                               strides=1,
                                               dilation=4,
                                               pre_dilation=2,
                                               avg_down=avg_down,
                                               norm_layer=norm_layer,
                                               last_gamma=last_gamma,
                                               dropblock_prob=dropblock_prob,
                                               input_size=input_size,
                                               use_splat=use_splat,
                                               avd=avd)
            elif dilation == 3:
                # special
                self.layer3 = self._make_layer(3,
                                               block,
                                               256,
                                               layers[2],
                                               strides=1,
                                               dilation=2,
                                               avg_down=avg_down,
                                               norm_layer=norm_layer,
                                               last_gamma=last_gamma,
                                               dropblock_prob=dropblock_prob,
                                               input_size=input_size,
                                               use_splat=use_splat,
                                               avd=avd)
                self.layer4 = self._make_layer(4,
                                               block,
                                               512,
                                               layers[3],
                                               strides=2,
                                               dilation=2,
                                               pre_dilation=2,
                                               avg_down=avg_down,
                                               norm_layer=norm_layer,
                                               last_gamma=last_gamma,
                                               dropblock_prob=dropblock_prob,
                                               input_size=input_size,
                                               use_splat=use_splat,
                                               avd=avd)
            elif dilation == 2:
                self.layer3 = self._make_layer(3,
                                               block,
                                               256,
                                               layers[2],
                                               strides=2,
                                               avg_down=avg_down,
                                               norm_layer=norm_layer,
                                               last_gamma=last_gamma,
                                               dropblock_prob=dropblock_prob,
                                               input_size=input_size,
                                               use_splat=use_splat,
                                               avd=avd)
                self.layer4 = self._make_layer(4,
                                               block,
                                               512,
                                               layers[3],
                                               strides=1,
                                               dilation=2,
                                               avg_down=avg_down,
                                               norm_layer=norm_layer,
                                               last_gamma=last_gamma,
                                               dropblock_prob=dropblock_prob,
                                               input_size=input_size,
                                               use_splat=use_splat,
                                               avd=avd)
            else:
                self.layer3 = self._make_layer(3,
                                               block,
                                               256,
                                               layers[2],
                                               strides=2,
                                               avg_down=avg_down,
                                               norm_layer=norm_layer,
                                               last_gamma=last_gamma,
                                               dropblock_prob=dropblock_prob,
                                               input_size=input_size,
                                               use_splat=use_splat,
                                               avd=avd)
                input_size = _update_input_size(input_size, 2)
                self.layer4 = self._make_layer(4,
                                               block,
                                               512,
                                               layers[3],
                                               strides=2,
                                               avg_down=avg_down,
                                               norm_layer=norm_layer,
                                               last_gamma=last_gamma,
                                               dropblock_prob=dropblock_prob,
                                               input_size=input_size,
                                               use_splat=use_splat,
                                               avd=avd)
                input_size = _update_input_size(input_size, 2)
            self.avgpool = nn.GlobalAvgPool2D()
            self.flat = nn.Flatten()
            self.drop = None
            if final_drop > 0.0:
                self.drop = nn.Dropout(final_drop)
            self.fc = nn.Dense(in_units=512 * block.expansion, units=classes)
    def __init__(self,
                 in_channels,
                 num_filter,
                 ratio,
                 strides,
                 dim_match,
                 name,
                 num_group,
                 bn_mom=0.9,
                 **kwargs):
        super(residual_unit, self).__init__(**kwargs)
        self.dim_match = dim_match
        self.num_filter = num_filter
        # block 1
        self.conv1 = nn.Conv2D(in_channels=in_channels,
                               channels=int(num_filter * 0.5),
                               kernel_size=(1, 1),
                               strides=(1, 1),
                               padding=(0, 0),
                               use_bias=False,
                               prefix=name + '_conv1_')
        self.bn1 = nn.BatchNorm(in_channels=int(num_filter * 0.5),
                                epsilon=2e-5,
                                momentum=bn_mom,
                                prefix=name + '_batchnorm1_')
        self.bn1min = nn.BatchNorm(in_channels=int(num_filter * 0.5),
                                   epsilon=2e-5,
                                   momentum=bn_mom,
                                   prefix=name + '_batchnorm1min_')
        self.relu1 = nn.Activation(activation='relu', prefix=name + '_relu1_')
        self.relu1min = NReLu(prefix=name + '_relu1min_')

        # block 2
        self.conv2 = nn.Conv2D(in_channels=int(num_filter * 0.5),
                               channels=int(num_filter * 0.5),
                               groups=num_group,
                               kernel_size=(3, 3),
                               strides=strides,
                               padding=(1, 1),
                               use_bias=False,
                               prefix=name + '_conv2_')
        self.bn2 = nn.BatchNorm(in_channels=int(num_filter * 0.5),
                                epsilon=2e-5,
                                momentum=bn_mom,
                                prefix=name + '_batchnorm2_')
        self.bn2min = nn.BatchNorm(in_channels=int(num_filter * 0.5),
                                   epsilon=2e-5,
                                   momentum=bn_mom,
                                   prefix=name + '_batchnorm2min_')
        self.relu2 = nn.Activation(activation='relu', prefix=name + '_relu2_')
        self.relu2min = NReLu(prefix=name + '_relu2min_')

        # block 3
        self.conv3 = nn.Conv2D(in_channels=int(num_filter * 0.5),
                               channels=num_filter,
                               kernel_size=(1, 1),
                               strides=(1, 1),
                               padding=(0, 0),
                               use_bias=False,
                               prefix=name + '_conv3_')
        self.bn3 = nn.BatchNorm(in_channels=num_filter,
                                epsilon=2e-5,
                                momentum=bn_mom,
                                prefix=name + '_batchnorm3_')
        self.bn3min = nn.BatchNorm(in_channels=num_filter,
                                   epsilon=2e-5,
                                   momentum=bn_mom,
                                   prefix=name + '_batchnorm3min_')

        # squeeze
        self.pool = nn.GlobalAvgPool2D(prefix=name + '_squeeze_')
        self.flatten = nn.Flatten(prefix=name + '_flatten_')

        # excitation 1
        self.fc1 = nn.Dense(units=int(num_filter * ratio),
                            in_units=num_filter,
                            prefix=name + '_excitation1_dense_')
        self.reluex1 = nn.Activation(activation='relu',
                                     prefix=name + '_excitation1_relu_')
        self.reluex1min = NReLu(prefix=name + '_excitation1_relumin_')

        # excitation 2
        self.fc2 = nn.Dense(units=num_filter,
                            in_units=int(num_filter * ratio),
                            prefix=name + '_excitation2_dense_')
        self.reluex2 = nn.Activation(activation='sigmoid',
                                     prefix=name + '_excitation2_sigmoid_')
        self.reluex2min = NSigmoid(prefix=name + '_excitation2_sigmoidmin_')

        if not dim_match:
            self.fc_sc = nn.Conv2D(in_channels=in_channels,
                                   channels=num_filter,
                                   kernel_size=(1, 1),
                                   strides=strides,
                                   use_bias=False,
                                   prefix=name + '_sc_dense_')
            self.bn_sc = nn.BatchNorm(in_channels=num_filter,
                                      epsilon=2e-5,
                                      momentum=bn_mom,
                                      prefix=name + '_sc_batchnorm_')
            self.bn_scmin = nn.BatchNorm(in_channels=num_filter,
                                         epsilon=2e-5,
                                         momentum=bn_mom,
                                         prefix=name + '_sc_batchnormmin_')

        self.relu3 = nn.Activation(activation='relu', prefix=name + '_relu3_')
        self.relu3min = NReLu(prefix=name + '_relu3min_')
Esempio n. 15
0
net = nn.Sequential()
# add name_scope on the outer most Sequential
with net.name_scope():
    net.add(
        mlpconv(96, 11, 0, strides=4),
        mlpconv(256, 5, 2),
        mlpconv(384, 3, 1),
        nn.Dropout(.5),
        # 目标类为10类 (10个通道)
        mlpconv(10, 3, 1, max_pooling=False),
        # 输入为 batch_size x 10 x 5 x 5, 通过AvgPool2D转成
        # batch_size x 10 x 1 x 1。
        # 我们可以使用 nn.AvgPool2D(pool_size=5),
        # 但更方便是使用全局池化,可以避免估算pool_size大小
        nn.GlobalAvgPool2D(),
        # 转成 batch_size x 10
        nn.Flatten())

##########################################################################
# train

train_data, test_data = utils.load_data_fashion_mnist(batch_size=64,
                                                      resize=224)

ctx = utils.try_gpu()
net.initialize(ctx=ctx, init=init.Xavier())

loss = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1})
utils.train(train_data, test_data, net, loss, trainer, ctx, num_epochs=1)
Esempio n. 16
0
    def __init__(self,
                 light=False,
                 stage_channels=(16, 32, 64, 128),
                 classes=1000,
                 norm_layer=nn.BatchNorm,
                 norm_kwargs=None,
                 activation='prelu',
                 drop=0.,
                 **kwargs):
        super(EPRNetCls, self).__init__()
        width1, width2, width3, width4 = tuple(stage_channels)
        self.stage_channels = stage_channels
        with self.name_scope():
            self.conv = nn.Conv2D(channels=width1,
                                  kernel_size=3,
                                  strides=2,
                                  padding=1,
                                  use_bias=False)
            self.bn = norm_layer(
                **({} if norm_kwargs is None else norm_kwargs))
            self.act = Activation(activation)

            self.layer1 = nn.HybridSequential()
            self.layer1.add(
                _EPRModule(channels=width2,
                           in_channels=width1,
                           atrous_rates=(1, 2, 4),
                           norm_layer=norm_layer,
                           norm_kwargs=norm_kwargs,
                           activation=activation,
                           down_sample=False,
                           light=light),
                _EPRModule(channels=width2,
                           in_channels=width2,
                           atrous_rates=(1, 2, 4),
                           norm_layer=norm_layer,
                           norm_kwargs=norm_kwargs,
                           activation=activation,
                           down_sample=True,
                           light=light))

            self.layer2 = nn.HybridSequential()
            self.layer2.add(
                _EPRModule(channels=width3,
                           in_channels=width2,
                           atrous_rates=(3, 6, 9),
                           norm_layer=norm_layer,
                           norm_kwargs=norm_kwargs,
                           activation=activation,
                           down_sample=False,
                           light=light),
                _EPRModule(channels=width3,
                           in_channels=width3,
                           atrous_rates=(3, 6, 9),
                           norm_layer=norm_layer,
                           norm_kwargs=norm_kwargs,
                           activation=activation,
                           down_sample=False,
                           light=light),
                _EPRModule(channels=width3,
                           in_channels=width3,
                           atrous_rates=(3, 6, 9),
                           norm_layer=norm_layer,
                           norm_kwargs=norm_kwargs,
                           activation=activation,
                           down_sample=False,
                           light=light),
                _EPRModule(channels=width3,
                           in_channels=width3,
                           atrous_rates=(3, 6, 9),
                           norm_layer=norm_layer,
                           norm_kwargs=norm_kwargs,
                           activation=activation,
                           down_sample=True,
                           light=light))

            self.layer3 = nn.HybridSequential()
            self.layer3.add(
                _EPRModule(channels=width4,
                           in_channels=width3,
                           atrous_rates=(7, 13, 19),
                           norm_layer=norm_layer,
                           norm_kwargs=norm_kwargs,
                           activation=activation,
                           down_sample=False,
                           light=light),
                _EPRModule(channels=width4,
                           in_channels=width4,
                           atrous_rates=(13, 25, 37),
                           norm_layer=norm_layer,
                           norm_kwargs=norm_kwargs,
                           activation=activation,
                           down_sample=False,
                           light=light))

            self.avg_pool = nn.GlobalAvgPool2D()
            self.flat = nn.Flatten()
            self.drop = nn.Dropout(drop) if drop > 0. else None
            self.linear = nn.Dense(units=classes)
Esempio n. 17
0
    def __init__(self,
                 channels,
                 init_block_channels,
                 final_block_channels,
                 kernel_sizes,
                 strides_per_stage,
                 expansion_factors,
                 dropout_rate=0.2,
                 tf_mode=False,
                 bn_epsilon=1e-5,
                 bn_use_global_stats=False,
                 in_channels=3,
                 in_size=(224, 224),
                 classes=1000,
                 **kwargs):
        super(EfficientNet, self).__init__(**kwargs)
        self.in_size = in_size
        self.classes = classes
        activation = "swish"

        with self.name_scope():
            self.features = nn.HybridSequential(prefix="")
            self.features.add(
                EffiInitBlock(in_channels=in_channels,
                              out_channels=init_block_channels,
                              bn_epsilon=bn_epsilon,
                              bn_use_global_stats=bn_use_global_stats,
                              activation=activation,
                              tf_mode=tf_mode))
            in_channels = init_block_channels
            for i, channels_per_stage in enumerate(channels):
                kernel_sizes_per_stage = kernel_sizes[i]
                expansion_factors_per_stage = expansion_factors[i]
                stage = nn.HybridSequential(prefix="stage{}_".format(i + 1))
                with stage.name_scope():
                    for j, out_channels in enumerate(channels_per_stage):
                        kernel_size = kernel_sizes_per_stage[j]
                        expansion_factor = expansion_factors_per_stage[j]
                        strides = strides_per_stage[i] if (j == 0) else 1
                        if i == 0:
                            stage.add(
                                EffiDwsConvUnit(
                                    in_channels=in_channels,
                                    out_channels=out_channels,
                                    strides=strides,
                                    bn_epsilon=bn_epsilon,
                                    bn_use_global_stats=bn_use_global_stats,
                                    activation=activation,
                                    tf_mode=tf_mode))
                        else:
                            stage.add(
                                EffiInvResUnit(
                                    in_channels=in_channels,
                                    out_channels=out_channels,
                                    kernel_size=kernel_size,
                                    strides=strides,
                                    expansion_factor=expansion_factor,
                                    bn_epsilon=bn_epsilon,
                                    bn_use_global_stats=bn_use_global_stats,
                                    activation=activation,
                                    tf_mode=tf_mode))
                        in_channels = out_channels
                self.features.add(stage)
            self.features.add(
                conv1x1_block(in_channels=in_channels,
                              out_channels=final_block_channels,
                              bn_epsilon=bn_epsilon,
                              bn_use_global_stats=bn_use_global_stats,
                              activation=activation))
            in_channels = final_block_channels
            self.features.add(nn.GlobalAvgPool2D())

            self.output = nn.HybridSequential(prefix="")
            self.output.add(nn.Flatten())
            if dropout_rate > 0.0:
                self.output.add(nn.Dropout(rate=dropout_rate))
            self.output.add(nn.Dense(units=classes, in_units=in_channels))
Esempio n. 18
0
 def __init__(self,
              block,
              layers,
              classes=1000,
              dilated=False,
              norm_layer=BatchNorm,
              norm_kwargs=None,
              last_gamma=False,
              deep_stem=False,
              stem_width=32,
              avg_down=False,
              final_drop=0.0,
              use_global_stats=False,
              name_prefix='',
              **kwargs):
     self.inplanes = stem_width * 2 if deep_stem else 64
     super(ResNetV1b, self).__init__(prefix=name_prefix)
     norm_kwargs = norm_kwargs if norm_kwargs is not None else {}
     if use_global_stats:
         norm_kwargs['use_global_stats'] = True
     self.norm_kwargs = norm_kwargs
     with self.name_scope():
         if not deep_stem:
             self.conv1 = nn.Conv2D(channels=64,
                                    kernel_size=7,
                                    strides=2,
                                    padding=3,
                                    use_bias=False)
         else:
             self.conv1 = nn.HybridSequential(prefix='conv1')
             self.conv1.add(
                 nn.Conv2D(channels=stem_width,
                           kernel_size=3,
                           strides=2,
                           padding=1,
                           use_bias=False))
             self.conv1.add(
                 norm_layer(in_channels=stem_width, **norm_kwargs))
             self.conv1.add(nn.Activation('relu'))
             self.conv1.add(
                 nn.Conv2D(channels=stem_width,
                           kernel_size=3,
                           strides=1,
                           padding=1,
                           use_bias=False))
             self.conv1.add(
                 norm_layer(in_channels=stem_width, **norm_kwargs))
             self.conv1.add(nn.Activation('relu'))
             self.conv1.add(
                 nn.Conv2D(channels=stem_width * 2,
                           kernel_size=3,
                           strides=1,
                           padding=1,
                           use_bias=False))
         self.bn1 = norm_layer(
             in_channels=64 if not deep_stem else stem_width * 2,
             **norm_kwargs)
         self.relu = nn.Activation('relu')
         self.maxpool = nn.MaxPool2D(pool_size=3, strides=2, padding=1)
         self.layer1 = self._make_layer(1,
                                        block,
                                        64,
                                        layers[0],
                                        avg_down=avg_down,
                                        norm_layer=norm_layer,
                                        last_gamma=last_gamma)
         self.layer2 = self._make_layer(2,
                                        block,
                                        128,
                                        layers[1],
                                        strides=2,
                                        avg_down=avg_down,
                                        norm_layer=norm_layer,
                                        last_gamma=last_gamma)
         if dilated:
             self.layer3 = self._make_layer(3,
                                            block,
                                            256,
                                            layers[2],
                                            strides=1,
                                            dilation=2,
                                            avg_down=avg_down,
                                            norm_layer=norm_layer,
                                            last_gamma=last_gamma)
             self.layer4 = self._make_layer(4,
                                            block,
                                            512,
                                            layers[3],
                                            strides=1,
                                            dilation=4,
                                            avg_down=avg_down,
                                            norm_layer=norm_layer,
                                            last_gamma=last_gamma)
         else:
             self.layer3 = self._make_layer(3,
                                            block,
                                            256,
                                            layers[2],
                                            strides=2,
                                            avg_down=avg_down,
                                            norm_layer=norm_layer,
                                            last_gamma=last_gamma)
             self.layer4 = self._make_layer(4,
                                            block,
                                            512,
                                            layers[3],
                                            strides=2,
                                            avg_down=avg_down,
                                            norm_layer=norm_layer,
                                            last_gamma=last_gamma)
         self.avgpool = nn.GlobalAvgPool2D()
         self.flat = nn.Flatten()
         self.drop = None
         if final_drop > 0.0:
             self.drop = nn.Dropout(final_drop)
         self.fc = nn.Dense(in_units=512 * block.expansion, units=classes)
Esempio n. 19
0
    def __init__(self,
                 channels,
                 init_block_channels,
                 final_block_channels,
                 residuals,
                 shortcuts,
                 kernel_sizes,
                 expansions,
                 bn_epsilon=1e-3,
                 bn_use_global_stats=False,
                 in_channels=3,
                 in_size=(224, 224),
                 classes=1000):
        super(ProxylessNAS, self).__init__()
        self.in_size = in_size
        self.classes = classes

        with self.name_scope():
            self.features = nn.HybridSequential(prefix='')
            self.features.add(
                conv3x3_block(in_channels=in_channels,
                              out_channels=init_block_channels,
                              strides=2,
                              bn_epsilon=bn_epsilon,
                              bn_use_global_stats=bn_use_global_stats,
                              activation="relu6"))
            in_channels = init_block_channels
            for i, channels_per_stage in enumerate(channels):
                stage = nn.HybridSequential(prefix="stage{}_".format(i + 1))
                residuals_per_stage = residuals[i]
                shortcuts_per_stage = shortcuts[i]
                kernel_sizes_per_stage = kernel_sizes[i]
                expansions_per_stage = expansions[i]
                with stage.name_scope():
                    for j, out_channels in enumerate(channels_per_stage):
                        residual = (residuals_per_stage[j] == 1)
                        shortcut = (shortcuts_per_stage[j] == 1)
                        kernel_size = kernel_sizes_per_stage[j]
                        expansion = expansions_per_stage[j]
                        strides = 2 if (j == 0) and (i != 0) else 1
                        stage.add(
                            ProxylessUnit(
                                in_channels=in_channels,
                                out_channels=out_channels,
                                kernel_size=kernel_size,
                                strides=strides,
                                bn_epsilon=bn_epsilon,
                                bn_use_global_stats=bn_use_global_stats,
                                expansion=expansion,
                                residual=residual,
                                shortcut=shortcut))
                        in_channels = out_channels
                self.features.add(stage)
            self.features.add(
                conv1x1_block(in_channels=in_channels,
                              out_channels=final_block_channels,
                              bn_epsilon=bn_epsilon,
                              bn_use_global_stats=bn_use_global_stats,
                              activation="relu6"))
            in_channels = final_block_channels
            self.features.add(nn.AvgPool2D(pool_size=7, strides=1))

            self.output = nn.HybridSequential(prefix='')
            self.output.add(nn.Flatten())
            self.output.add(nn.Dense(units=classes, in_units=in_channels))
Esempio n. 20
0
    h4_linear = nd.dot(h3, lenet_W4) + lenet_b4
    if verbose:
        print('1st conv block:', h1.shape)
        print('2nd conv block:', h2.shape)
        print('1st dense:', h3.shape)
        print('2nd dense:', h4_linear.shape)
        print('output:', h4_linear)
    return h4_linear


lenet = nn.Sequential()
with lenet.name_scope():
    lenet.add(nn.Conv2D(channels=20, kernel_size=5, activation='relu'),
              nn.MaxPool2D(pool_size=2, strides=2),
              nn.Conv2D(channels=50, kernel_size=3, activation='relu'),
              nn.MaxPool2D(pool_size=2, strides=2), nn.Flatten(),
              nn.Dense(128, activation="relu"), nn.Dense(10))
lenet.initialize(ctx=ctx)

arch_A = ((1, 64), (1, 128), (2, 256), (2, 512), (2, 512))
arch_B = ((2, 64), (2, 128), (2, 256), (2, 512), (2, 512))
arch_D = ((2, 64), (2, 128), (3, 256), (3, 512), (3, 512))
arch_E = ((2, 64), (2, 128), (4, 256), (4, 512), (4, 512))


def vgg_stack(arch):
    out = nn.Sequential()
    for (num_convs, channels) in arch:
        seq = nn.Sequential()
        for _ in range(num_convs):
            seq.add(
Esempio n. 21
0
    def __init__(self,
                 direct_channels,
                 skip_channels,
                 init_block_channels,
                 bn_use_global_stats=False,
                 in_channels=3,
                 in_size=(224, 224),
                 classes=1000,
                 **kwargs):
        super(FishNet, self).__init__(**kwargs)
        self.in_size = in_size
        self.classes = classes

        depth = len(direct_channels[0])
        down1_channels = direct_channels[0]
        up_channels = direct_channels[1]
        down2_channels = direct_channels[2]
        skip1_channels = skip_channels[0]
        skip2_channels = skip_channels[1]

        with self.name_scope():
            self.features = nn.HybridSequential(prefix='')
            self.features.add(
                SEInitBlock(in_channels=in_channels,
                            out_channels=init_block_channels,
                            bn_use_global_stats=bn_use_global_stats))
            in_channels = init_block_channels

            down1_seq = nn.HybridSequential(prefix='')
            skip1_seq = nn.HybridSequential(prefix='')
            for i in range(depth + 1):
                skip1_channels_list = skip1_channels[i]
                if i < depth:
                    skip1_seq.add(
                        SkipUnit(in_channels=in_channels,
                                 out_channels_list=skip1_channels_list,
                                 bn_use_global_stats=bn_use_global_stats))
                    down1_channels_list = down1_channels[i]
                    down1_seq.add(
                        DownUnit(in_channels=in_channels,
                                 out_channels_list=down1_channels_list,
                                 bn_use_global_stats=bn_use_global_stats))
                    in_channels = down1_channels_list[-1]
                else:
                    skip1_seq.add(
                        SkipAttUnit(in_channels=in_channels,
                                    out_channels_list=skip1_channels_list,
                                    bn_use_global_stats=bn_use_global_stats))
                    in_channels = skip1_channels_list[-1]

            up_seq = nn.HybridSequential(prefix='')
            skip2_seq = nn.HybridSequential(prefix='')
            for i in range(depth + 1):
                skip2_channels_list = skip2_channels[i]
                if i > 0:
                    in_channels += skip1_channels[depth - i][-1]
                if i < depth:
                    skip2_seq.add(
                        SkipUnit(in_channels=in_channels,
                                 out_channels_list=skip2_channels_list,
                                 bn_use_global_stats=bn_use_global_stats))
                    up_channels_list = up_channels[i]
                    dilation = 2**i
                    up_seq.add(
                        UpUnit(in_channels=in_channels,
                               out_channels_list=up_channels_list,
                               dilation=dilation,
                               bn_use_global_stats=bn_use_global_stats))
                    in_channels = up_channels_list[-1]
                else:
                    skip2_seq.add(Identity())

            down2_seq = nn.HybridSequential(prefix='')
            for i in range(depth):
                down2_channels_list = down2_channels[i]
                down2_seq.add(
                    DownUnit(in_channels=in_channels,
                             out_channels_list=down2_channels_list,
                             bn_use_global_stats=bn_use_global_stats))
                in_channels = down2_channels_list[-1] + skip2_channels[depth -
                                                                       1 -
                                                                       i][-1]

            self.features.add(
                SesquialteralHourglass(down1_seq=down1_seq,
                                       skip1_seq=skip1_seq,
                                       up_seq=up_seq,
                                       skip2_seq=skip2_seq,
                                       down2_seq=down2_seq))
            self.features.add(
                FishFinalBlock(in_channels=in_channels,
                               bn_use_global_stats=bn_use_global_stats))
            in_channels = in_channels // 2
            self.features.add(nn.AvgPool2D(pool_size=7, strides=1))

            self.output = nn.HybridSequential(prefix='')
            self.output.add(
                conv1x1(in_channels=in_channels,
                        out_channels=classes,
                        use_bias=True))
            self.output.add(nn.Flatten())
Esempio n. 22
0
                                 batch_size=batch_size,
                                 shuffle=False)

# model
net = nn.Sequential()
#net.add(
#        nn.Dense(500,activation='relu'),
#        nn.Dense(256,activation='relu'),
#        nn.Dropout(dropout_rate),
#        nn.Dense(out_put_num,activation='sigmoid')
#    )
net.add(nn.Conv1D(8, kernel_size=5, activation='relu'),
        nn.Conv1D(16, kernel_size=5, activation='relu'),
        nn.BatchNorm(momentum=0.8), nn.MaxPool1D(pool_size=2),
        nn.Conv1D(16, kernel_size=1, activation='relu'),
        nn.Conv1D(16, kernel_size=5, activation='relu'), nn.Flatten(),
        nn.Dense(256, activation='relu'), nn.Dropout(0.25),
        nn.Dense(out_put_num, activation='relu'))
net.initialize(mx.init.Xavier(magnitude=2.24))
#net.initialize(mx.init.MSRAPrelu())
#net.initialize(mx.init.Normal(0.5) ,ctx=ctx)
#net.load_parameters(para_filepath)
net.collect_params().reset_ctx(ctx)

# solve
loss = gloss.SoftmaxCrossEntropyLoss()
metric = mx.metric.Accuracy()


def test():
    metric = mx.metric.Accuracy()
Esempio n. 23
0
    def __init__(self,
                 block,
                 layers,
                 cardinality=1,
                 bottleneck_width=64,
                 classes=1000,
                 dilated=False,
                 dilation=1,
                 norm_layer=nn.BatchNorm,
                 norm_kwargs=None,
                 last_gamma=False,
                 deep_stem=False,
                 stem_width=32,
                 avg_down=False,
                 final_drop=0.0,
                 use_global_stats=False,
                 name_prefix='',
                 dropblock_prob=0,
                 input_size=224,
                 use_splat=False,
                 radix=2,
                 avd=False,
                 avd_first=False,
                 split_drop_ratio=0):
        self.cardinality = cardinality
        self.bottleneck_width = bottleneck_width
        self.inplanes = stem_width * 2 if deep_stem else 64
        self.radix = radix
        self.split_drop_ratio = split_drop_ratio
        self.avd_first = avd_first
        super(ResNet, self).__init__(prefix=name_prefix)
        norm_kwargs = norm_kwargs if norm_kwargs is not None else {}
        if use_global_stats:
            norm_kwargs['use_global_stats'] = True
        self.norm_kwargs = norm_kwargs
        with self.name_scope():
            if not deep_stem:
                # use 3*3 with stride1 instead of 7*7
                self.conv1 = nn.Conv2D(channels=64,
                                       kernel_size=3,
                                       strides=1,
                                       padding=1,
                                       use_bias=False,
                                       in_channels=3)
            else:
                self.conv1 = nn.HybridSequential(prefix='conv1')
                self.conv1.add(
                    nn.Conv2D(channels=stem_width,
                              kernel_size=3,
                              strides=1,
                              padding=1,
                              use_bias=False,
                              in_channels=3))
                self.conv1.add(
                    norm_layer(in_channels=stem_width, **norm_kwargs))
                self.conv1.add(gluon_act(config.net_act))
                self.conv1.add(
                    nn.Conv2D(channels=stem_width,
                              kernel_size=3,
                              strides=1,
                              padding=1,
                              use_bias=False,
                              in_channels=stem_width))
                self.conv1.add(
                    norm_layer(in_channels=stem_width, **norm_kwargs))
                self.conv1.add(gluon_act(config.net_act))
                self.conv1.add(
                    nn.Conv2D(channels=stem_width * 2,
                              kernel_size=3,
                              strides=1,
                              padding=1,
                              use_bias=False,
                              in_channels=stem_width))

            self.bn1 = norm_layer(
                in_channels=64 if not deep_stem else stem_width * 2,
                **norm_kwargs)
            self.relu = gluon_act(config.net_act)

            # stage 1
            self.layer1 = self._make_layer(1,
                                           block,
                                           64,
                                           layers[0],
                                           strides=2,
                                           avg_down=avg_down,
                                           norm_layer=norm_layer,
                                           last_gamma=last_gamma,
                                           use_splat=use_splat,
                                           avd=avd)
            input_size = _update_input_size(input_size, 2)

            # stage 2
            self.layer2 = self._make_layer(2,
                                           block,
                                           128,
                                           layers[1],
                                           strides=2,
                                           avg_down=avg_down,
                                           norm_layer=norm_layer,
                                           last_gamma=last_gamma,
                                           use_splat=use_splat,
                                           avd=avd)
            input_size = _update_input_size(input_size, 2)

            # stage3 ~ stage4
            if dilated or dilation == 4:
                self.layer3 = self._make_layer(3,
                                               block,
                                               256,
                                               layers[2],
                                               strides=1,
                                               dilation=2,
                                               avg_down=avg_down,
                                               norm_layer=norm_layer,
                                               last_gamma=last_gamma,
                                               dropblock_prob=dropblock_prob,
                                               input_size=input_size,
                                               use_splat=use_splat,
                                               avd=avd)
                self.layer4 = self._make_layer(4,
                                               block,
                                               512,
                                               layers[3],
                                               strides=1,
                                               dilation=4,
                                               pre_dilation=2,
                                               avg_down=avg_down,
                                               norm_layer=norm_layer,
                                               last_gamma=last_gamma,
                                               dropblock_prob=dropblock_prob,
                                               input_size=input_size,
                                               use_splat=use_splat,
                                               avd=avd)
            elif dilation == 3:
                # special
                self.layer3 = self._make_layer(3,
                                               block,
                                               256,
                                               layers[2],
                                               strides=1,
                                               dilation=2,
                                               avg_down=avg_down,
                                               norm_layer=norm_layer,
                                               last_gamma=last_gamma,
                                               dropblock_prob=dropblock_prob,
                                               input_size=input_size,
                                               use_splat=use_splat,
                                               avd=avd)
                self.layer4 = self._make_layer(4,
                                               block,
                                               512,
                                               layers[3],
                                               strides=2,
                                               dilation=2,
                                               pre_dilation=2,
                                               avg_down=avg_down,
                                               norm_layer=norm_layer,
                                               last_gamma=last_gamma,
                                               dropblock_prob=dropblock_prob,
                                               input_size=input_size,
                                               use_splat=use_splat,
                                               avd=avd)
            elif dilation == 2:
                self.layer3 = self._make_layer(3,
                                               block,
                                               256,
                                               layers[2],
                                               strides=2,
                                               avg_down=avg_down,
                                               norm_layer=norm_layer,
                                               last_gamma=last_gamma,
                                               dropblock_prob=dropblock_prob,
                                               input_size=input_size,
                                               use_splat=use_splat,
                                               avd=avd)
                self.layer4 = self._make_layer(4,
                                               block,
                                               512,
                                               layers[3],
                                               strides=1,
                                               dilation=2,
                                               avg_down=avg_down,
                                               norm_layer=norm_layer,
                                               last_gamma=last_gamma,
                                               dropblock_prob=dropblock_prob,
                                               input_size=input_size,
                                               use_splat=use_splat,
                                               avd=avd)
            else:
                self.layer3 = self._make_layer(3,
                                               block,
                                               256,
                                               layers[2],
                                               strides=2,
                                               avg_down=avg_down,
                                               norm_layer=norm_layer,
                                               last_gamma=last_gamma,
                                               dropblock_prob=dropblock_prob,
                                               input_size=input_size,
                                               use_splat=use_splat,
                                               avd=avd)
                input_size = _update_input_size(input_size, 2)
                self.layer4 = self._make_layer(4,
                                               block,
                                               512,
                                               layers[3],
                                               strides=2,
                                               avg_down=avg_down,
                                               norm_layer=norm_layer,
                                               last_gamma=last_gamma,
                                               dropblock_prob=dropblock_prob,
                                               input_size=input_size,
                                               use_splat=use_splat,
                                               avd=avd)
                input_size = _update_input_size(input_size, 2)

            self.flat = nn.Flatten()
Esempio n. 24
0
def train_mnist():
    # Select a fixed random seed for reproducibility
    mx.random.seed(42)

    if version == '':
        net = nn.HybridSequential(prefix='DApp_')
        with net.name_scope():
            net.add(
                nn.Conv2D(channels=16, kernel_size=(3, 3), activation='relu'),
                nn.MaxPool2D(pool_size=(2, 2), strides=(1, 1)),
                nn.Conv2D(channels=32, kernel_size=(3, 3), activation='relu'),
                nn.MaxPool2D(pool_size=(2, 2), strides=(1, 1)),
                nn.Conv2D(channels=64, kernel_size=(3, 3), activation='relu'),
                nn.MaxPool2D(pool_size=(2, 2), strides=(2, 2)),
                nn.Conv2D(channels=128, kernel_size=(1, 1), activation='relu'),
                nn.MaxPool2D(pool_size=(2, 2), strides=(2, 2)),
                nn.Flatten(),
                nn.Dense(10, activation=None),
            )
    elif version == 'lenet':
        net = nn.HybridSequential(prefix='LeNet_')
        with net.name_scope():
            net.add(
                nn.Conv2D(channels=20, kernel_size=(5, 5), activation='relu'),
                nn.MaxPool2D(pool_size=(2, 2), strides=(2, 2)),
                nn.Conv2D(channels=50, kernel_size=(5, 5), activation='relu'),
                nn.MaxPool2D(pool_size=(2, 2), strides=(2, 2)),
                nn.Flatten(),
                nn.Dense(500, activation='relu'),
                nn.Dense(10, activation=None),
            )
    elif version == 'mlp':
        net = nn.HybridSequential(prefix='MLP_')
        with net.name_scope():
            net.add(
                nn.Flatten(),
                nn.Dense(128, activation='relu'),
                nn.Dense(64, activation='relu'),
                nn.Dense(10, activation=None)  # loss function includes softmax already, see below
            )

    net.initialize(mx.init.Xavier(), ctx=ctx)
    net.summary(nd.zeros((1, 1, 28, 28), ctx=ctx))

    trainer = gluon.Trainer(
	params=net.collect_params(),
	optimizer='adam',
	optimizer_params={'learning_rate': 1e-3},
    )
    metric = mx.metric.Accuracy()
    loss_function = gluon.loss.SoftmaxCrossEntropyLoss()
    num_epochs = 10

    for epoch in range(num_epochs):
        for inputs, labels in train_loader:
            inputs = inputs.as_in_context(ctx)
            labels = labels.as_in_context(ctx)

            with autograd.record():
                outputs = net(inputs)
                loss = loss_function(outputs, labels)

            loss.backward()
            metric.update(labels, outputs)

            trainer.step(batch_size=inputs.shape[0])

        name, acc = metric.get()
        print('After epoch {}: {} = {:5.2%}'.format(epoch + 1, name, acc))
        metric.reset()

    for inputs, labels in val_loader:
        inputs = inputs.as_in_context(ctx)
        labels = labels.as_in_context(ctx)
        metric.update(labels, net(inputs))
    print('Validaton: {} = {}'.format(*metric.get()))
    assert metric.get()[1] > 0.96

    sym = net(mx.sym.var('data'))
    sym_file, param_file = load_fname(version)
    open(sym_file, "w").write(sym.tojson())
    net.collect_params().save(param_file)
Esempio n. 25
0
def get_block(block_mode='conv', act_mode='relu', use_se=False):
    if block_mode == 'just-conv':
        net = gluon.nn.HybridSequential()
        net.add(
            nn.Conv2D(16, kernel_size=3, strides=2,
                      padding=1, use_bias=False, prefix='1st_conv_'),
            nn.BatchNorm(momentum=0.1),
            Activation(act_mode)
        )
        if use_se:
            net.add(SE(16))
        net.add(
            nn.Conv2D(32, in_channels=16, kernel_size=3, strides=2,
                      padding=1, use_bias=False, prefix='2nd_conv_'),
            nn.BatchNorm(momentum=0.1),
            Activation(act_mode)
        )
    elif block_mode == 'SNB':
        net = gluon.nn.HybridSequential()
        net.add(
            nn.Conv2D(16, kernel_size=3, strides=2,
                      padding=1, use_bias=False, prefix='1st_conv_'),
            nn.BatchNorm(momentum=0.1),
            Activation(act_mode)
        )
        if use_se:
            net.add(SE(16))
        net.add(
            ShuffleNetBlock(16, 32, 16, bn=nn.BatchNorm,
                            block_mode='ShuffleNetV2', ksize=3, stride=1,
                            use_se=use_se, act_name=act_mode)
        )
    elif block_mode == 'SNB-x':
        net = gluon.nn.HybridSequential()
        net.add(
            nn.Conv2D(16, kernel_size=3, strides=2,
                      padding=1, use_bias=False, prefix='1st_conv_'),
            nn.BatchNorm(momentum=0.1),
            Activation(act_mode)
        )
        if use_se:
            net.add(SE(16))
        net.add(
            ShuffleNetBlock(16, 32, 16, bn=nn.BatchNorm,
                            block_mode='ShuffleXception', ksize=3, stride=1,
                            use_se=use_se, act_name=act_mode)
        )
    elif block_mode == 'ShuffleNas_fixArch':
        architecture = [0, 0, 0, 0, 0, 0, 1, 1, 2, 0, 1, 1, 0, 0, 1, 2, 2, 0, 2, 0]
        scale_ids = [8, 6, 5, 7, 6, 7, 3, 4, 2, 4, 2, 3, 4, 3, 6, 7, 5, 3, 4, 6]
        net = get_shufflenas_oneshot(architecture=architecture, scale_ids=scale_ids,
                                     use_se=True, last_conv_after_pooling=True)
    else:
        raise ValueError("Unrecognized mode: {}".format(block_mode))

    if block_mode != 'ShuffleNas_fixArch':
        net.add(nn.GlobalAvgPool2D(),
                nn.Conv2D(10, in_channels=32, kernel_size=1, strides=1,
                          padding=0, use_bias=True),
                nn.Flatten()
                )
    else:
        net.output = nn.HybridSequential(prefix='output_')
        with net.output.name_scope():
            net.output.add(
                nn.Conv2D(10, in_channels=1024, kernel_size=1, strides=1,
                          padding=0, use_bias=True),
                nn.Flatten()
            )
    return net
Esempio n. 26
0
    def __init__(self,
                 net_name,
                 batch_size,
                 num_class,
                 use_bias=False,
                 use_bn=False,
                 do_topdown=False,
                 do_countpath=False,
                 do_pn=False,
                 relu_td=False,
                 do_nn=False):
        super(NRM, self).__init__()
        self.num_class = num_class
        self.do_topdown = do_topdown
        self.do_countpath = do_countpath
        self.do_pn = do_pn
        self.relu_td = relu_td
        self.do_nn = do_nn
        self.use_bn = use_bn
        self.use_bias = use_bias
        self.batch_size = batch_size
        self.features, layers_drm, layers_drm_cp = self._make_layers(
            cfg[net_name], use_bias, use_bn, self.do_topdown,
            self.do_countpath)
        with self.name_scope():
            self.classifier = nn.HybridSequential(prefix='classifier_')
            conv_layer = nn.Conv2D(in_channels=cfg[net_name][-2],
                                   channels=self.num_class,
                                   kernel_size=(1, 1),
                                   use_bias=True)
            self.classifier.add(conv_layer)
            self.classifier.add(nn.Flatten())

        if self.do_topdown:
            layers_drm += [
                nn.Conv2DTranspose(channels=cfg[net_name][-2],
                                   in_channels=self.num_class,
                                   kernel_size=(1, 1),
                                   strides=(1, 1),
                                   use_bias=False,
                                   params=conv_layer.params),
                Reshape(shape=(self.num_class, 1, 1))
            ]
            with self.name_scope():
                self.drm = nn.HybridSequential(prefix='drmtd_')
                for block in layers_drm[::-1]:
                    self.drm.add(block)
            if self.do_pn:
                with self.name_scope():
                    self.insnorms = nn.HybridSequential(prefix='instancenorm_')
                    for i in range(len(self.drm._children)):
                        if (self.drm._children[i].name.find('batchnorm') !=
                                -1) and (i < (len(self.drm._children) - 1)):
                            self.insnorms.add(InstanceNorm())
                with self.name_scope():
                    self.insnorms_fw = nn.HybridSequential(
                        prefix='instancenormfw_')
                    for i in range(len(self.features._children)):
                        if (self.features._children[i].name.find('batchnorm')
                                != -1):
                            self.insnorms_fw.add(InstanceNorm())

        if self.do_countpath:
            layers_drm_cp += [
                nn.Conv2DTranspose(channels=cfg[net_name][-2],
                                   in_channels=self.num_class,
                                   kernel_size=(1, 1),
                                   strides=(1, 1),
                                   use_bias=False),
                Reshape(shape=(self.num_class, 1, 1))
            ]
            with self.name_scope():
                self.drm_cp = nn.HybridSequential(prefix='drmcp_')
                for block in layers_drm_cp[::-1]:
                    self.drm_cp.add(block)
Esempio n. 27
0
import sys

sys.path.append('..')
import gluonbook as gb
from mxnet import autograd, gluon, init, nd
from mxnet.gluon import loss as gloss, nn

batch_size = 256
train_iter, test_iter = gb.load_data_fashion_mnist(batch_size)
#初始化模型
net = nn.Sequential()  #代表全连接层
net.add(nn.Flatten())  #图片压成向量
net.add(nn.Dense(10))  #增加10个节点的输出层
net.initialize(init.Normal(sigma=0.01))
#定义损失函数
loss = gloss.SoftmaxCrossEntropyLoss()
#定义优化算法
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1})

num_epochs = 5
gb.train_cpu(net, train_iter, test_iter, loss, num_epochs, batch_size, None,
             None, trainer)
Esempio n. 28
0
    def __init__(self, num_classes=1001):
        super(NASNetALarge, self).__init__()
        self.num_classes = num_classes

        self.conv0 = nn.HybridSequential()
        self.conv0.add(nn.Conv2D(channels=96, kernel_size=3, padding=0, strides=1, use_bias=False))
        self.conv0.add(nn.BatchNorm(epsilon=0.001, momentum=0.1))

        self.cell_stem_0 = CellStem0()
        self.cell_stem_1 = CellStem1()

        self.cell_0 = FirstCell(in_channels_left=168, out_channels_left=84,
                                in_channels_right=336, out_channels_right=168)
        self.cell_1 = NormalCell(in_channels_left=336, out_channels_left=168,
                                 in_channels_right=1008, out_channels_right=168)
        self.cell_2 = NormalCell(in_channels_left=1008, out_channels_left=168,
                                 in_channels_right=1008, out_channels_right=168)
        self.cell_3 = NormalCell(in_channels_left=1008, out_channels_left=168,
                                 in_channels_right=1008, out_channels_right=168)
        self.cell_4 = NormalCell(in_channels_left=1008, out_channels_left=168,
                                 in_channels_right=1008, out_channels_right=168)
        self.cell_5 = NormalCell(in_channels_left=1008, out_channels_left=168,
                                 in_channels_right=1008, out_channels_right=168)

        self.reduction_cell_0 = ReductionCell0(in_channels_left=1008, out_channels_left=336,
                                               in_channels_right=1008, out_channels_right=336)

        self.cell_6 = FirstCell(in_channels_left=1008, out_channels_left=168,
                                in_channels_right=1344, out_channels_right=336)
        self.cell_7 = NormalCell(in_channels_left=1344, out_channels_left=336,
                                 in_channels_right=2016, out_channels_right=336)
        self.cell_8 = NormalCell(in_channels_left=2016, out_channels_left=336,
                                 in_channels_right=2016, out_channels_right=336)
        self.cell_9 = NormalCell(in_channels_left=2016, out_channels_left=336,
                                 in_channels_right=2016, out_channels_right=336)
        self.cell_10 = NormalCell(in_channels_left=2016, out_channels_left=336,
                                  in_channels_right=2016, out_channels_right=336)
        self.cell_11 = NormalCell(in_channels_left=2016, out_channels_left=336,
                                  in_channels_right=2016, out_channels_right=336)

        self.reduction_cell_1 = ReductionCell1(in_channels_left=2016, out_channels_left=672,
                                               in_channels_right=2016, out_channels_right=672)

        self.cell_12 = FirstCell(in_channels_left=2016, out_channels_left=336,
                                 in_channels_right=2688, out_channels_right=672)
        self.cell_13 = NormalCell(in_channels_left=2688, out_channels_left=672,
                                  in_channels_right=4032, out_channels_right=672)
        self.cell_14 = NormalCell(in_channels_left=4032, out_channels_left=672,
                                  in_channels_right=4032, out_channels_right=672)
        self.cell_15 = NormalCell(in_channels_left=4032, out_channels_left=672,
                                  in_channels_right=4032, out_channels_right=672)
        self.cell_16 = NormalCell(in_channels_left=4032, out_channels_left=672,
                                  in_channels_right=4032, out_channels_right=672)
        self.cell_17 = NormalCell(in_channels_left=4032, out_channels_left=672,
                                  in_channels_right=4032, out_channels_right=672)

        self.relu = nn.Activation(activation='relu')
        self.avgpool = nn.AvgPool2D(pool_size=11, strides=1, padding=0)
        self.flatten = nn.Flatten()
        self.dropout = nn.Dropout(0.5)
        self.dense= nn.Dense(num_classes)
Esempio n. 29
0
def generate_lookup_table(use_se, last_conv_after_pooling, channels_layout, nas_root):
    stage_repeats = [4, 4, 8, 4]
    if channels_layout == 'OneShot':
        stage_out_channels = [64, 160, 320, 640]
    elif channels_layout == 'ShuffleNetV2+':
        stage_out_channels = [48, 128, 256, 512]
    else:
        raise ValueError('Unrecognized channel layout')
    channel_scales = [0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0]
    first_conv_out_channel = 16
    input_size = 224
    last_conv_out_channel = 1024
    input_data = nd.ones((1, 3, input_size, input_size))
    bar = Bar(max_step=sum(stage_repeats) + 2, name='Building lookup table')
    bar.start()

    lookup_table = dict()
    lookup_table['config'] = dict()
    lookup_table['config']['use_se'] = use_se
    lookup_table['config']['last_conv_after_pooling'] = last_conv_after_pooling
    lookup_table['config']['channels_layout'] = channels_layout
    lookup_table['config']['stage_repeats'] = stage_repeats
    lookup_table['config']['stage_out_channels'] = stage_out_channels
    lookup_table['config']['channel_scales'] = channel_scales
    lookup_table['config']['first_conv_out_channel'] = first_conv_out_channel
    lookup_table['config']['input_size'] = input_size
    lookup_table['config']['last_conv_out_channel'] = last_conv_out_channel

    # input block
    bar.step()
    input_block = nn.HybridSequential()
    input_block.add(
        nn.Conv2D(first_conv_out_channel, in_channels=3, kernel_size=3, strides=2,
                  padding=1, use_bias=False, prefix='first_conv_'),
        nn.BatchNorm(momentum=0.1),
        Activation('hard_swish' if use_se else 'relu')
    )
    input_block_flops, input_block_model_size, input_data = get_block_flop(input_block, input_data)
    lookup_table['flops'] = dict()
    lookup_table['params'] = dict()
    lookup_table['flops']['input_block'] = input_block_flops
    lookup_table['params']['input_block'] = input_block_model_size

    # mid blocks
    lookup_table['flops']['nas_block'] = []  # 20 x 4 x 10, num_of_blocks x num_of_block_choices x num_of_channel_scales
    lookup_table['params']['nas_block'] = []
    input_channel = first_conv_out_channel
    for stage_id in range(len(stage_repeats)):
        numrepeat = stage_repeats[stage_id]
        output_channel = stage_out_channels[stage_id]

        if use_se:
            act_name = 'hard_swish' if stage_id >= 1 else 'relu'
            block_use_se = True if stage_id >= 2 else False
        else:
            act_name = 'relu'
            block_use_se = False
        # create repeated blocks for current stage
        for i in range(numrepeat):
            bar.step()
            stride = 2 if i == 0 else 1
            output_data = None
            block_flops = [[0] * len(channel_scales) for _ in range(4)]
            block_params = [[0] * len(channel_scales) for _ in range(4)]
            for scale_i, scale in enumerate(channel_scales):
                # TODO: change back to make_divisible
                # mid_channel = make_divisible(int(output_channel // 2 * channel_scales[block_id]))
                mid_channel = int(output_channel // 2 * scale)
                # SNB 3x3
                snb3 = ShuffleNetBlock(input_channel, output_channel, mid_channel,
                                       block_mode='ShuffleNetV2', ksize=3, stride=stride,
                                       use_se=block_use_se, act_name=act_name)
                snb3_block_flops, snb3_block_model_size, _ = get_block_flop(snb3, input_data)
                # SNB 5x5
                snb5 = ShuffleNetBlock(input_channel, output_channel, mid_channel,
                                       block_mode='ShuffleNetV2', ksize=5, stride=stride,
                                       use_se=block_use_se, act_name=act_name)
                snb5_block_flops, snb5_block_model_size, _ = get_block_flop(snb5, input_data)
                # SNB 7x7
                snb7 = ShuffleNetBlock(input_channel, output_channel, mid_channel,
                                       block_mode='ShuffleNetV2', ksize=7, stride=stride,
                                       use_se=block_use_se, act_name=act_name)
                snb7_block_flops, snb7_block_model_size, _ = get_block_flop(snb7, input_data)
                # SXB 3x3
                sxb3 = ShuffleNetBlock(input_channel, output_channel, mid_channel,
                                       block_mode='ShuffleXception', ksize=3, stride=stride,
                                       use_se=block_use_se, act_name=act_name)
                sxb3_block_flops, sxb3_block_model_size, output_data = get_block_flop(sxb3, input_data)
                # fill the table
                block_flops[0][scale_i] = snb3_block_flops
                block_params[0][scale_i] = snb3_block_model_size
                block_flops[1][scale_i] = snb5_block_flops
                block_params[1][scale_i] = snb5_block_model_size
                block_flops[2][scale_i] = snb7_block_flops
                block_params[2][scale_i] = snb7_block_model_size
                block_flops[3][scale_i] = sxb3_block_flops
                block_params[3][scale_i] = sxb3_block_model_size

            lookup_table['flops']['nas_block'].append(block_flops)
            lookup_table['params']['nas_block'].append(block_params)
            input_data = output_data
            input_channel = output_channel

    # output block
    bar.step()
    output_block = nn.HybridSequential()
    if last_conv_after_pooling:
        # MobileNet V3 approach
        output_block.add(
            nn.GlobalAvgPool2D(),
            # no last SE for MobileNet V3 style
            nn.Conv2D(last_conv_out_channel, kernel_size=1, strides=1,
                      padding=0, use_bias=True, prefix='conv_fc_'),
            # No bn for the conv after pooling
            Activation('hard_swish' if use_se else 'relu')
        )
    else:
        if use_se:
            # ShuffleNetV2+ approach
            output_block.add(
                nn.Conv2D(make_divisible(last_conv_out_channel * 0.75), in_channels=input_channel,
                          kernel_size=1, strides=1,
                          padding=0, use_bias=False, prefix='last_conv_'),
                nn.BatchNorm(momentum=0.1),
                Activation('hard_swish' if use_se else 'relu'),
                nn.GlobalAvgPool2D(),
                SE(make_divisible(last_conv_out_channel * 0.75)),
                nn.Conv2D(last_conv_out_channel, in_channels=make_divisible(last_conv_out_channel * 0.75),
                          kernel_size=1, strides=1,
                          padding=0, use_bias=True, prefix='conv_fc_'),
                # No bn for the conv after pooling
                Activation('hard_swish' if use_se else 'relu')
            )
        else:
            # original Oneshot Nas approach
            output_block.add(
                nn.Conv2D(last_conv_out_channel, in_channels=input_channel, kernel_size=1, strides=1,
                          padding=0, use_bias=False, prefix='last_conv_'),
                nn.BatchNorm(momentum=0.1),
                Activation('hard_swish' if use_se else 'relu'),
                nn.GlobalAvgPool2D()
            )

    # Dropout ratio follows ShuffleNetV2+ for se
    output_block.add(
        nn.Dropout(0.2 if use_se else 0.1),
        nn.Conv2D(1000, in_channels=last_conv_out_channel, kernel_size=1, strides=1,
                  padding=0, use_bias=True),
        nn.Flatten()
    )
    output_block_flops, output_block_model_size, output_data = get_block_flop(output_block, input_data)
    lookup_table['flops']['output_block'] = output_block_flops
    lookup_table['params']['output_block'] = output_block_model_size

    pp = pprint.PrettyPrinter(indent=4)
    pp.pprint(lookup_table)

    json_file = os.path.join(nas_root, 'models/lookup_table')
    if use_se:
        json_file += '_se'
    if last_conv_after_pooling:
        json_file += '_lastConvAfterPooling'
    json_file += '_' +channels_layout + '.json'
    with open(json_file, 'w') as fp:
        json.dump(lookup_table, fp, indent=4)
Esempio n. 30
0
    def __init__(self,
                 cfg,
                 cls_ch_squeeze,
                 cls_ch_expand,
                 multiplier=1.,
                 classes=1000,
                 norm_kwargs=None,
                 last_gamma=False,
                 final_drop=0.,
                 use_global_stats=False,
                 name_prefix='',
                 norm_layer=BatchNorm):
        super(_MobileNetV3, self).__init__(prefix=name_prefix)
        norm_kwargs = norm_kwargs if norm_kwargs is not None else {}
        if use_global_stats:
            norm_kwargs['use_global_stats'] = True
        # initialize residual networks
        k = multiplier
        self.last_gamma = last_gamma
        self.norm_kwargs = norm_kwargs
        self.inplanes = 16

        with self.name_scope():
            self.features = nn.HybridSequential(prefix='')
            self.features.add(nn.Conv2D(channels=make_divisible(k*self.inplanes), \
                                        kernel_size=3, padding=1, strides=2,
                                        use_bias=False, prefix='first-3x3-conv-conv2d_'))
            self.features.add(norm_layer(prefix='first-3x3-conv-batchnorm_'))
            self.features.add(HardSwish())
            i = 0
            for layer_cfg in cfg:
                layer = self._make_layer(
                    kernel_size=layer_cfg[0],
                    exp_ch=make_divisible(k * layer_cfg[1]),
                    out_channel=make_divisible(k * layer_cfg[2]),
                    use_se=layer_cfg[3],
                    act_func=layer_cfg[4],
                    stride=layer_cfg[5],
                    prefix='seq-%d' % i,
                )
                self.features.add(layer)
                i += 1
            self.features.add(nn.Conv2D(channels= \
                         make_divisible(k*cls_ch_squeeze), \
                         kernel_size=1, padding=0, strides=1,
                                        use_bias=False, prefix='last-1x1-conv1-conv2d_'))
            self.features.add(
                norm_layer(prefix='last-1x1-conv1-batchnorm_',
                           **({} if norm_kwargs is None else norm_kwargs)))
            self.features.add(HardSwish())
            self.features.add(nn.GlobalAvgPool2D())
            self.features.add(
                nn.Conv2D(channels=cls_ch_expand,
                          kernel_size=1,
                          padding=0,
                          strides=1,
                          use_bias=False,
                          prefix='last-1x1-conv2-conv2d_'))
            self.features.add(HardSwish())

            if final_drop > 0:
                self.features.add(nn.Dropout(final_drop))
            self.output = nn.HybridSequential(prefix='output_')
            with self.output.name_scope():
                self.output.add(
                    nn.Conv2D(in_channels=cls_ch_expand,
                              channels=classes,
                              kernel_size=1,
                              prefix='fc_'), nn.Flatten())