def __init__(self, units, num_stage, filter_list, ratio_list, num_class, num_group, data_type, drop_out, bn_mom=0.9, **kwargs): super(se_resnext, self).__init__(**kwargs) num_unit = len(units) assert (num_unit == num_stage) self.conv0 = nn.Conv2D(in_channels=3, channels=filter_list[0], kernel_size=(7, 7), strides=(2, 2), padding=(3, 3), use_bias=False, prefix='conv0_') self.bn0 = nn.BatchNorm(in_channels=filter_list[0], epsilon=2e-5, momentum=bn_mom, prefix='batchnorm0_') self.relu0 = nn.Activation(activation='relu', prefix='relu0_') self.relu0min = NReLu(prefix='relu0min_') self.pool0 = nn.MaxPool2D(pool_size=(3, 3), strides=(2, 2), padding=(1, 1), prefix='pool0_') self.residual_stages = nn.HybridSequential(prefix='residual_') for i in range(num_stage): self.residual_stages.add( residual_unit(in_channels=filter_list[i], num_filter=filter_list[i + 1], ratio=ratio_list[2], strides=(1 if i == 0 else 2, 1 if i == 0 else 2), dim_match=False, name='stage%d_unit%d' % (i + 1, 1), num_group=num_group, bn_mom=bn_mom, prefix='stage%d_unit%d_' % (i + 1, 1))) for j in range(units[i] - 1): self.residual_stages.add( residual_unit(in_channels=filter_list[i + 1], num_filter=filter_list[i + 1], ratio=ratio_list[2], strides=(1, 1), dim_match=True, name='stage%d_unit%d' % (i + 1, j + 2), num_group=num_group, bn_mom=bn_mom, prefix='stage%d_unit%d_' % (i + 1, j + 2))) self.pool1 = nn.GlobalAvgPool2D(prefix='pool1_') self.flatten1 = nn.Flatten(prefix='flatten1_') self.drop1 = nn.Dropout(rate=drop_out, prefix='dp1_') self.fc = nn.Dense(units=num_class, in_units=filter_list[-1], prefix='dense_')
for (num_convs, channels) in architecture: out.add(vgg_block(num_convs, channels)) return out ############################################################### # model and params num_outputs = 10 architecture = ((2, 64), (2, 128), (4, 256), (4, 512), (4, 512)) net = nn.HybridSequential() # add name_scope on the outermost Sequential # 8 conv layer + 3 denses = VGG 11 # 13 conv layer + 3 denses = VGG 16 # 16 conv layer + 3 denses = VGG 19 with net.name_scope(): net.add(vgg_stack(architecture), nn.Flatten(), nn.Dense(4096, activation="relu"), nn.Dropout(.5), nn.Dense(4096, activation="relu"), nn.Dropout(.5), nn.Dense(num_outputs)) ############################################################### ############### 그래프 ############### import gluoncv gluoncv.utils.viz.plot_network(net, shape=(64, 3, 224, 224)) ##################################### ##### 최적화 ##### net.collect_params().initialize(mx.init.Xavier(), ctx=ctx) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.01})
def __init__(self, channels, init_block_channels, init_block_kernel_size, init_block_padding, rs, bws, incs, groups, b_case, for_training, test_time_pool, in_channels=3, in_size=(224, 224), classes=1000, **kwargs): super(DPN, self).__init__(**kwargs) self.in_size = in_size self.classes = classes with self.name_scope(): self.features = DualPathSequential(return_two=False, first_ordinals=1, last_ordinals=0, prefix="") self.features.add( DPNInitBlock(in_channels=in_channels, out_channels=init_block_channels, kernel_size=init_block_kernel_size, padding=init_block_padding)) in_channels = init_block_channels for i, channels_per_stage in enumerate(channels): stage = DualPathSequential(prefix="stage{}_".format(i + 1)) r = rs[i] bw = bws[i] inc = incs[i] with stage.name_scope(): for j, out_channels in enumerate(channels_per_stage): has_proj = (j == 0) key_strides = 2 if (j == 0) and (i != 0) else 1 stage.add( DPNUnit(in_channels=in_channels, mid_channels=r, bw=bw, inc=inc, groups=groups, has_proj=has_proj, key_strides=key_strides, b_case=b_case)) in_channels = out_channels self.features.add(stage) self.features.add(DPNFinalBlock(channels=in_channels)) self.output = nn.HybridSequential(prefix="") if for_training or not test_time_pool: self.output.add(nn.GlobalAvgPool2D()) self.output.add( conv1x1(in_channels=in_channels, out_channels=classes, use_bias=True)) self.output.add(nn.Flatten()) else: self.output.add(nn.AvgPool2D(pool_size=7, strides=1)) self.output.add( conv1x1(in_channels=in_channels, out_channels=classes, use_bias=True)) self.output.add(GlobalAvgMaxPool2D()) self.output.add(nn.Flatten())
def __init__(self, block, layers, classes=1000, dilated=False, norm_layer=BatchNorm, last_gamma=False, **kwargs): self.inplanes = 64 super(ResNetV1b, self).__init__() with self.name_scope(): self.conv1 = nn.Conv2D(in_channels=3, channels=64, kernel_size=7, strides=2, padding=3, use_bias=False) self.bn1 = norm_layer(in_channels=64) self.relu = nn.Activation('relu') self.maxpool = nn.MaxPool2D(pool_size=3, strides=2, padding=1) self.layer1 = self._make_layer(1, block, 64, layers[0], norm_layer=norm_layer, last_gamma=last_gamma) self.layer2 = self._make_layer(2, block, 128, layers[1], strides=2, norm_layer=norm_layer, last_gamma=last_gamma) if dilated: self.layer3 = self._make_layer(3, block, 256, layers[2], strides=1, dilation=2, norm_layer=norm_layer, last_gamma=last_gamma) self.layer4 = self._make_layer(4, block, 512, layers[3], strides=1, dilation=4, norm_layer=norm_layer, last_gamma=last_gamma) else: self.layer3 = self._make_layer(3, block, 256, layers[2], strides=2, norm_layer=norm_layer, last_gamma=last_gamma) self.layer4 = self._make_layer(4, block, 512, layers[3], strides=2, norm_layer=norm_layer, last_gamma=last_gamma) self.avgpool = nn.AvgPool2D(7) self.flat = nn.Flatten() self.fc = nn.Dense(in_units=512 * block.expansion, units=classes)
from mxnet import nd from mxnet import gluon from mxnet.gluon import nn from mxnet.gluon.data.vision import datasets, transforms import matplotlib.pyplot as plt net = nn.Sequential() net.add(nn.Conv2D(channels=6, kernel_size=5, activation='relu'), nn.MaxPool2D(pool_size=2, strides=2), nn.Conv2D(channels=16, kernel_size=3, activation='relu'), nn.MaxPool2D(pool_size=2, strides=2), nn.Flatten(), nn.Dense(120, activation="relu"), nn.Dense(84, activation="relu"), nn.Dense(10)) net.load_parameters('net.params') transformer = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(0.13, 0.31)]) mnist_valid = datasets.FashionMNIST(train=False) X, y = mnist_valid[:6] preds = [] for x in X: x = transformer(x).expand_dims(axis=0) pred = net(x).argmax(axis=1) preds.append(pred.astype('int32').asscalar()) _, figs = plt.subplots(1, 6, figsize=(15, 15)) text_labels = [ 't-shirt', 'trouser', 'pullover', 'dress', 'coat', 'sandal', 'shirt',
def main(args): if args.gpu == -1: ctx = mx.cpu() else: ctx = mx.gpu(args.gpu) with ctx: batch_size = args.batch_size if ((args.dataset == 'mnist') or (args.dataset == 'fmnist')): num_inputs = 28 * 28 num_outputs = 10 elif args.dataset == 'chmnist': num_inputs = 64 * 64 num_outputs = 8 elif args.dataset == 'bcw': num_inputs = 30 num_outputs = 2 elif args.dataset == 'cifar10': num_inputs = 32 * 32 * 3 num_outputs = 10 else: sys.exit('Not Implemented Dataset!') ################################################# # Multiclass Logistic Regression MLR = gluon.nn.Sequential() with MLR.name_scope(): MLR.add(gluon.nn.Dense(num_outputs)) ######################################################################################################################## def evaluate_accuracy(data_iterator, net): acc = mx.metric.Accuracy() for i, (data, label) in enumerate(data_iterator): if args.net == 'mlr': data = data.as_in_context(ctx).reshape((-1, num_inputs)) label = label.as_in_context(ctx) elif args.net == 'dnn10' and (args.dataset == 'mnist' or args.dataset == 'fmnist'): data = data.as_in_context(ctx).reshape((-1, 1, 28, 28)) label = label.as_in_context(ctx) elif args.dataset == 'chmnist': data = data.as_in_context(ctx).reshape((-1, 1, 64, 64)) label = label.as_in_context(ctx) elif args.net == 'dnn2': data = data.as_in_context(ctx).reshape( (-1, 1, 1, num_inputs)) label = label.as_in_context(ctx) elif args.dataset == 'cifar10': data = data.as_in_context(ctx).reshape((-1, 3, 32, 32)) label = label.as_in_context(ctx) output = net(data) predictions = nd.argmax(output, axis=1) if args.dataset == 'chmnist': predictions = predictions.reshape(-1, 1) acc.update(preds=predictions, labels=label) return acc.get()[1] ######################################################################################################################## # decide attack type if args.byz_type == 'partial_trim': # partial knowledge trim attack byz = byzantine.partial_trim elif args.byz_type == 'full_trim': # full knowledge trim attack byz = byzantine.full_trim elif args.byz_type == 'full_krum': byz = byzantine.full_krum elif args.byz_type == 'no': byz = byzantine.no_byz else: sys.exit('Not Implemented Attack!') # decide model architecture if args.net == 'mlr': net = MLR net.collect_params().initialize(mx.init.Xavier(magnitude=1.), force_reinit=True, ctx=ctx) elif args.net == 'dnn10': net = nn.Sequential() net.add(nn.Conv2D(channels=30, kernel_size=3, activation='relu'), nn.MaxPool2D(pool_size=2, strides=2), nn.Conv2D(channels=50, kernel_size=3, activation='relu'), nn.MaxPool2D(pool_size=2, strides=2), nn.Flatten(), nn.Dense(200, activation='relu'), nn.Dense(10)) net.collect_params().initialize(mx.init.Xavier(magnitude=1.), force_reinit=True, ctx=ctx) elif args.net == 'dnn2': net = nn.Sequential() net.add(nn.Conv2D(channels=30, kernel_size=1, activation='relu'), nn.MaxPool2D(pool_size=1, strides=1), nn.Conv2D(channels=50, kernel_size=1, activation='relu'), nn.MaxPool2D(pool_size=1, strides=1), nn.Flatten(), nn.Dense(200, activation='relu'), nn.Dense(2)) net.initialize(init=init.Xavier(), ctx=ctx) #net.collect_params().initialize(mx.init.Xavier(magnitude=1.), force_reinit=True, ctx=ctx) elif args.net == 'resnet20': net = get_model('cifar_resnet20_v1', pretrained=False, classes=8, ctx=ctx) net.collect_params().initialize(mx.init.Xavier(), ctx=ctx) else: sys.exit('Not Implemented model architecture!') # define loss softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() # set upt parameters num_workers = args.nworkers lr = args.lr epochs = args.nepochs cmax = args.cmax dec = args.decay grad_list = [] train_acc_list = [] # generate a string indicating the parameters paraString = str(args.byz_type) + "_" + str( args.aggregation) + "_" + str(args.dataset) + "_" + str( args.net) + "_lr_" + str(args.lr) + "_bias_" + str( args.bias) + "_m_" + str(args.nworkers) + "_c_" + str( args.nbyz) + "_cmax_" + str(args.cmax) + "_d_" + str( args.decay) + "_batch_" + str( args.batch_size) + "_epochs_" + str( args.nepochs) + "_" # set up seed seed = args.seed mx.random.seed(seed) random.seed(seed) np.random.seed(seed) # load dataset if (args.dataset == 'mnist'): def transform(data, label): return data.astype(np.float32) / 255, label.astype(np.float32) test_data = mx.gluon.data.DataLoader( mx.gluon.data.vision.datasets.MNIST(train=False, transform=transform), 500, shuffle=False, last_batch='rollover') train_data = mx.gluon.data.DataLoader( mx.gluon.data.vision.datasets.MNIST(train=True, transform=transform), 60000, shuffle=True, last_batch='rollover') elif (args.dataset == 'cifar10'): transform_train = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) test_data = gluon.data.DataLoader(gluon.data.vision.CIFAR10( train=False).transform_first(transform_test), batch_size=32, shuffle=False) train_data = gluon.data.DataLoader(gluon.data.vision.CIFAR10( train=True).transform_first(transform_train), batch_size=32, shuffle=True, last_batch='discard') elif (args.dataset == 'fmnist'): def transform(data, label): return data.astype(np.float32) / 255, label.astype(np.float32) test_data = mx.gluon.data.DataLoader( mx.gluon.data.vision.datasets.FashionMNIST( train=False, transform=transform), 500, shuffle=False, last_batch='rollover') train_data = mx.gluon.data.DataLoader( mx.gluon.data.vision.datasets.FashionMNIST( train=True, transform=transform), 60000, shuffle=True, last_batch='rollover') elif (args.dataset == 'chmnist'): chdata = genfromtxt('chmnist64_shuffled.csv', delimiter=',') train_data_ = chdata[1:4001] test_data_ = chdata[4001:] train_data = mx.gluon.data.DataLoader( mx.gluon.data.dataset.ArrayDataset( train_data_[:, 1:-1].astype(np.float32) / 255, train_data_[:, -1:].astype(np.float32) - 1), 4000, shuffle=False, last_batch='rollover') test_data = mx.gluon.data.DataLoader( mx.gluon.data.dataset.ArrayDataset( test_data_[:, 1:-1].astype(np.float32) / 255, test_data_[:, -1:].astype(np.float32) - 1), 1000, shuffle=True, last_batch='rollover') elif (args.dataset == 'bcw'): data = load_breast_cancer() df = pd.DataFrame(data.data, columns=data.feature_names) y = data.target df = (df - df.mean()) / (df.max() - df.min()) X_train, X_test, y_train, y_test = train_test_split( df, y, test_size=0.20, random_state=69) train_data = mx.gluon.data.DataLoader( mx.gluon.data.dataset.ArrayDataset( X_train.values.astype(np.float32), y_train.astype(np.float32)), 455, shuffle=False, last_batch='rollover') test_data = mx.gluon.data.DataLoader( mx.gluon.data.dataset.ArrayDataset( X_test.values.astype(np.float32), y_test.astype(np.float32)), 114, shuffle=True, last_batch='rollover') else: sys.exit('Not Implemented dataset!') # biased assignment bias_weight = args.bias other_group_size = (1 - bias_weight) / (num_outputs - 1) worker_per_group = num_workers / (num_outputs) # assign non-IID training data to each worker each_worker_data = [[] for _ in range(num_workers)] each_worker_label = [[] for _ in range(num_workers)] counter = 0 server_data = mx.nd.empty((100, 1, 28, 28)) server_label = mx.nd.empty(100) for _, (data, label) in enumerate(train_data): for (x, y) in zip(data, label): if (args.dataset == 'mnist' or args.dataset == 'fmnist') and args.net == 'mlr': x = x.as_in_context(ctx).reshape(-1, num_inputs) if (args.dataset == 'mnist' or args.dataset == 'fmnist') and args.net == 'dnn10': x = x.as_in_context(ctx).reshape(-1, 1, 28, 28) if args.dataset == 'chmnist': x = x.as_in_context(ctx).reshape(-1, 1, 64, 64) if args.dataset == 'bcw': x = x.as_in_context(ctx).reshape(-1, 1, 1, 30) if args.dataset == 'cifar10': x = x.as_in_context(ctx).reshape(-1, 3, 32, 32) y = y.as_in_context(ctx) # assign a data point to a group upper_bound = (y.asnumpy()) * (1 - bias_weight) / ( num_outputs - 1) + bias_weight lower_bound = (y.asnumpy()) * (1 - bias_weight) / (num_outputs - 1) rd = np.random.random_sample() if rd > upper_bound: worker_group = int( np.floor((rd - upper_bound) / other_group_size) + y.asnumpy() + 1) elif rd < lower_bound: worker_group = int(np.floor(rd / other_group_size)) else: worker_group = y.asnumpy() # assign a data point to a worker rd = np.random.random_sample() selected_worker = int(worker_group * worker_per_group + int(np.floor(rd * worker_per_group))) each_worker_data[selected_worker].append(x) each_worker_label[selected_worker].append(y) if (args.aggregation == 'fltrust'): if (counter < 100): server_data[counter] = x.reshape((1, 28, 28)) server_label[counter] = y # concatenate the data for each worker each_worker_data = [ nd.concat(*each_worker, dim=0) for each_worker in each_worker_data ] each_worker_label = [ nd.concat(*each_worker, dim=0) for each_worker in each_worker_label ] #pdb.set_trace() # random shuffle the workers random_order = np.random.RandomState( seed=seed).permutation(num_workers) each_worker_data = [each_worker_data[i] for i in random_order] each_worker_label = [each_worker_label[i] for i in random_order] P = 0 if (args.net == 'mlr' and (args.dataset == 'mnist' or args.dataset == 'fmnist')): shape = (1, 784) elif (args.net == 'dnn10' and (args.dataset == 'mnist' or args.dataset == 'fmnist')): shape = (1, 1, 28, 28) elif (args.dataset == 'chmnist'): shape = (1, 1, 64, 64) elif (args.dataset == 'bcw'): shape = (1, 1, 1, 30) elif (args.dataset == 'cifar10'): shape = (1, 3, 32, 32) dummy_output = net(mx.nd.zeros(shape)) # count the total number of parameters in the network for param in net.collect_params().values(): if param.grad_req != 'null': P = P + len(param.grad().reshape(-1)) #pdb.set_trace() if (args.aggregation == 'EULtrim') or (args.aggregation == 'EULkrum') or (args.aggregation == 'EULmedian'): if args.dataset == 'mnist': valid_dataset = mx.gluon.data.vision.datasets.MNIST( train=False, transform=transform) if args.dataset == 'fmnist': valid_dataset = mx.gluon.data.vision.datasets.FashionMNIST( train=False, transform=transform) sampled = np.random.choice(10000, 100) valid_array = mx.gluon.data.dataset.ArrayDataset( valid_dataset[sampled[:]][0], valid_dataset[sampled[:]][1]) valid_data = mx.gluon.data.DataLoader(valid_array, 100, shuffle=True) del valid_dataset del valid_array direction = mx.nd.zeros(P) #current direction of the global model flip_vector = np.empty(epochs) #flipscore of the global model local_flip_vector = np.zeros( (epochs, num_workers)) #flipscore of all local models local_flip_new = np.zeros((epochs, num_workers)) active = np.arange( num_workers ) #used in the earlier version to know which clients aren't blacklisted yet blacklist = np.zeros(num_workers) susp = nd.zeros(num_workers) #suspicion score of all clients test_acc = np.empty(epochs) corrected = epochs #in which epoch were cmax clients removed flag_corrected = 1 max_flip = 1.0 #used for the whitebox adaptive attack client_list = np.ones( (epochs, num_workers)) * (-1) #clients chosen by EUL/FABA etc # begin training for e in range(epochs): #print (lr) #if (e == 200): lr = lr/2 #if (e == 400): lr = lr/2 if (args.aggregation == 'fltrust'): with autograd.record(): output = net(server_data) loss = softmax_cross_entropy(output, server_label) loss.backward() server_params = [ param.grad().copy() for param in net.collect_params().values() if param.grad_req != 'null' ] for i in range(num_workers): if (blacklist[i] == 0): # sample a batch minibatch = np.random.choice(list( range(each_worker_data[i].shape[0])), size=batch_size, replace=False) # forward with autograd.record(): output = net(each_worker_data[i][minibatch]) loss = softmax_cross_entropy( output, each_worker_label[i][minibatch]) # backward loss.backward() grad_list.append([ param.grad().copy() for param in net.collect_params().values() if param.grad_req != 'null' ]) if cmax > 0: flag_corrected = 1 susp = susp / dec #lr = get_lr(args.lr, e, epochs) if args.aggregation == 'trim1': # we aggregate the gradients instead of local model weights in this demo because for the # aggregation rules in our setting, it is equivalent to aggregate either of them _, direction, cmax, flip_count, lfs = nd_aggregation.trim1( e, grad_list, net, lr, byz, direction, active, blacklist, susp, args.nbyz, cmax, args.utrg, args.udet, args.urem) flip_vector[e] = flip_count local_flip_vector[e] = lfs.asnumpy() if args.aggregation == 'trim': # we aggregate the gradients instead of local model weights in this demo because for the # aggregation rules in our setting, it is equivalent to aggregate either of them _, direction, cmax, flip_count, lfs, lfs_new = nd_aggregation.trim( e, grad_list, net, lr, byz, direction, active, blacklist, susp, args.nbyz, cmax, args.utrg, args.udet, args.urem) flip_vector[e] = flip_count local_flip_vector[e] = lfs.asnumpy() local_flip_new[e] = lfs_new.asnumpy() elif args.aggregation == 'fltrust': nd_aggregation.fltrust(e, server_params, grad_list, net, lr, byz, args.nbyz, active) elif args.aggregation == 'foolsgold': nd_aggregation.foolsgold(e, grad_list, net, lr, byz, args.nbyz, active) elif args.aggregation == 'krum': _, direction, cmax, flip_count, lfs, max_flip = nd_aggregation.krum( e, grad_list, net, lr, byz, direction, active, blacklist, susp, args.nbyz, cmax, args.utrg, args.udet, args.urem, max_flip) flip_vector[e] = flip_count local_flip_vector[e] = lfs.asnumpy() elif args.aggregation == 'median': _, direction, cmax, flip_count, lfs = nd_aggregation.median( e, grad_list, net, lr, byz, direction, active, blacklist, susp, args.nbyz, cmax, args.utrg, args.udet, args.urem) flip_vector[e] = flip_count local_flip_vector[e] = lfs.asnumpy() elif args.aggregation == 'bulyan': _, bul_list = nd_aggregation.bulyan(e, grad_list, net, lr, byz, args.nbyz) client_list[e] = bul_list elif args.aggregation == 'faba': faba_list = nd_aggregation.faba(e, grad_list, net, lr, byz, args.nbyz) client_list[e] = faba_list elif args.aggregation == 'EULtrim': _, eul_list = nd_aggregation.EULtrim(e, grad_list, net, lr, byz, valid_data, args.net, args.nbyz, args.gpu) client_list[e] = eul_list elif args.aggregation == 'EULkrum': _, eul_list = nd_aggregation.EULkrum(e, grad_list, net, lr, byz, valid_data, args.net, args.nbyz, args.gpu) client_list[e] = eul_list elif args.aggregation == 'EULmedian': _, eul_list = nd_aggregation.EULmedian(e, grad_list, net, lr, byz, valid_data, args.net, args.nbyz, args.gpu) client_list[e] = eul_list else: sys.exit('Not Implemented aggregation!') if (cmax == 0 and flag_corrected == 1): corrected = e flag_corrected = 0 # free memory del grad_list # reset the list grad_list = [] # compute training accuracy every 10 iterations '''if (e+1) % 1 == 0: pdb.set_trace() train_accuracy = evaluate_accuracy(train_data, net) train_acc_list.append(train_accuracy) print("Epoch %02d. Train_acc %0.4f" % (e, train_accuracy)) # save the training accuracy every 100 iterations if (e+1) % 1 == 0: if (args.dataset == 'mnist' and args.net == 'mlr'): if not os.path.exists('out_mnist_mlr/'): os.mkdir('out_mnist_mlr/') np.savetxt('out_mnist_mlr/' + paraString, train_acc_list, fmt='%.4f') elif (args.dataset == 'mnist' and args.net == 'cnn'): if not os.path.exists('out_mnist_cnn/'): os.mkdir('out_mnist_cnn/') np.savetxt('out_mnist_cnn/' + paraString, train_acc_list, fmt='%.4f') ''' # compute the final testing accuracy #if (e+1) == args.nepochs: test_accuracy = evaluate_accuracy(test_data, net) test_acc[e] = test_accuracy print("Epoch %02d. Test_acc %0.4f" % (e, test_accuracy)) filename = args.filename myString = args.aggregation + '_' + args.byz_type + '_' + args.net + '_' + args.dataset + '_' + str( args.utrg) + '_' + str(args.udet) + '_' + str( args.urem) + '_' + filename + '_' if not os.path.exists('Outputs/'): os.mkdir('Outputs/') #np.save('Outputs/'+paraString+'Flip_old.npy', local_flip_new ) np.save('Outputs/' + paraString + 'Test_acc.npy', test_acc) #np.save('Outputs/'+paraString+'FLip_local_old.npy', local_flip_vector) #np.save('Outputs/'+paraString+'Reputation_old.npy', susp.asnumpy()) net.save_parameters('Outputs/' + paraString + 'net.params') ones = nd.ones(num_workers) zeros = nd.zeros(num_workers) tp = 0 fp = 0 tn = 0 fn = 0 if (args.aggregation == 'krum' or args.aggregation == 'trim' or args.aggregation == 'median'): for i in range(epochs): sflip = np.argsort(local_flip_vector[i]) c_removed = len(np.where(local_flip_vector[i] == 0)[0]) cmax_then = args.cmax - c_removed if (cmax_then > 0): tp = tp + len(np.where(sflip[-cmax_then:] < args.nbyz)[0]) fp = fp + len(np.where(sflip[-cmax_then:] >= args.nbyz)[0]) tn = tn + len( np.where(sflip[c_removed:-cmax_then] >= args.nbyz)[0]) fn = fn + len( np.where(sflip[c_removed:-cmax_then] < args.nbyz)[0]) else: tn = tn + len(np.where(sflip[c_removed:] >= args.nbyz)[0]) fn = fn + len(np.where(sflip[c_removed:] < args.nbyz)[0]) if (args.aggregation == 'bulyan' or args.aggregation == 'faba' or args.aggregation == 'EULtrim' or args.aggregation == 'EULkrum' or args.aggregation == 'EULmedian'): for i in range(epochs): positives = len(np.where(client_list[i] == -1)[0]) negatives = num_workers - positives tn = tn + len( np.where(client_list[i, :negatives] >= args.nbyz)[0]) fn = fn + len( np.where(client_list[i, :negatives] < args.nbyz)[0]) tp = tp + args.nbyz - len( np.where(client_list[i, :negatives] < args.nbyz)[0]) fp = fp + num_workers - args.nbyz - len( np.where(client_list[i, :negatives] >= args.nbyz)[0]) print(tp, fp, tn, fn, corrected)
#coding:utf-8 import gluonbook as gb from mxnet import gluon, init from mxnet.gluon import loss as gloss, nn drop_prob1 = 0.2 drop_prob2 = 0.5 net = nn.Sequential() net.add(nn.Flatten()) net.add(nn.Dense(256, activation='relu')) net.add(nn.Dropout(drop_prob1)) net.add(nn.Dense(256, activation='relu')) net.add(nn.Dropout(drop_prob2)) net.add(nn.Dense(10)) net.initialize(init.Normal(sigma=0.01)) num_epoch = 40 batch_size = 256 train_iter, test_iter = gb.load_data_fashion_mnist(batch_size) loss = gloss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.5}) gb.train_ch3(net, train_iter, test_iter, loss, num_epoch, batch_size, None, None, trainer)
def __init__(self, depth, ctx, pretrained=True, num_features=0, num_classes=0): super(ResNet, self).__init__() self.pretrained = pretrained self.num_classes = num_classes with self.name_scope(): model1 = ResNet.__factory[depth](pretrained=pretrained, ctx=ctx).features[:-1] model1[-1][0].body[0]._kwargs['stride'] = (1, 1) model1[-1][0].downsample[0]._kwargs['stride'] = (1, 1) model2 = ResNet.__factory[depth](pretrained=pretrained, ctx=ctx).features[:-1] model2[-1][0].body[0]._kwargs['stride'] = (1, 1) model2[-1][0].downsample[0]._kwargs['stride'] = (1, 1) model3 = ResNet.__factory[depth](pretrained=pretrained, ctx=ctx).features[:-1] model3[-1][0].body[0]._kwargs['stride'] = (1, 1) model3[-1][0].downsample[0]._kwargs['stride'] = (1, 1) #backbone self.base = nn.HybridSequential() for m in model1[:-2]: self.base.add(m) self.base.add(model1[-2][0]) #branch 1 self.branch1 = nn.HybridSequential() for m in model1[-2][1:]: self.branch1.add(m) for m in model1[-1]: self.branch1.add(m) #branch 2 self.branch2 = nn.HybridSequential() for m in model2[-2][1:]: self.branch2.add(m) for m in model2[-1]: self.branch2.add(m) #branch 3 self.branch3 = nn.HybridSequential() for m in model3[-2][1:]: self.branch3.add(m) for m in model3[-1]: self.branch3.add(m) #local self.feat = nn.HybridSequential() self.classify = nn.HybridSequential() for _ in range(5): tmp = nn.HybridSequential() tmp.add(nn.GlobalMaxPool2D()) feat = nn.Conv2D(channels=num_features, kernel_size=1, use_bias=False) feat.initialize(init=init.MSRAPrelu('in', 0), ctx=ctx) tmp.add(feat) bn = nn.BatchNorm() bn.initialize(init=init.Zero(), ctx=ctx) tmp.add(bn) tmp.add(nn.Flatten()) self.feat.add(tmp) classifier = nn.Dense(num_classes, use_bias=False) classifier.weight.initialize(init=init.Normal(0.001), ctx=ctx) self.classify.add(classifier) #global self.g_feat = nn.HybridSequential() self.g_classify = nn.HybridSequential() for _ in range(3): tmp = nn.HybridSequential() tmp.add(nn.GlobalAvgPool2D()) feat = nn.Conv2D(channels=num_features, kernel_size=1, use_bias=False) feat.initialize(init=init.MSRAPrelu('in', 0), ctx=ctx) tmp.add(feat) bn = nn.BatchNorm(center=False, scale=True) bn.initialize(init=init.Zero(), ctx=ctx) tmp.add(bn) tmp.add(nn.Flatten()) self.g_feat.add(tmp) classifier = nn.Dense(num_classes, use_bias=False) classifier.initialize(init=init.Normal(0.001), ctx=ctx) self.g_classify.add(classifier)
def __init__(self, units, num_stage, filter_list, ratio_list, num_class, num_group, data_type, drop_out, bn_mom=0.9, **kwargs): super(resnext, self).__init__(**kwargs) num_unit = len(units) assert (num_unit == num_stage) self.num_class = num_class self.bnfirst = nn.BatchNorm(in_channels=3, epsilon=2e-5, momentum=bn_mom, prefix='batchnormfirst_') # fw self.conv0 = nn.Conv2D(in_channels=3, channels=filter_list[0], kernel_size=(7, 7), strides=(2, 2), padding=(3, 3), use_bias=False, prefix='conv0_') self.bn0 = nn.BatchNorm(in_channels=filter_list[0], epsilon=2e-5, momentum=bn_mom, prefix='batchnorm0_') self.bias0 = BiasAdder(channels=filter_list[0], prefix='bias0_') self.relu0 = nn.Activation(activation='relu', prefix='relu0_') self.relu0min = NReLu(prefix='relu0min_') self.pool0 = nn.MaxPool2D(pool_size=(2, 2), strides=(2, 2), padding=(0, 0), prefix='pool0_') # td self.upsample0 = UpsampleLayer(size=2, scale=1., prefix='up0_') self.bntd0 = nn.BatchNorm(in_channels=filter_list[0], epsilon=2e-5, momentum=bn_mom, prefix='td_batchnorm0_') self.bntd0min = nn.BatchNorm(in_channels=filter_list[0], epsilon=2e-5, momentum=bn_mom, prefix='td_batchnorm0min_') self.tdconv0 = nn.Conv2DTranspose(channels=3, in_channels=filter_list[0], kernel_size=(7, 7), strides=(2, 2), padding=(3, 3), output_padding=1, use_bias=False, params=self.conv0.params, prefix='td_conv0_') self.bntdfinal = nn.BatchNorm(in_channels=3, epsilon=2e-5, momentum=bn_mom, prefix='td_batchnormfinal_') self.bntdfinalmin = nn.BatchNorm(in_channels=3, epsilon=2e-5, momentum=bn_mom, prefix='td_batchnormfinalmin_') self.residual_stages = nn.HybridSequential(prefix='residual_') topdown_list = [] for i in range(num_stage): self.residual_stages.add( residual_unit(in_channels=filter_list[i], num_filter=filter_list[i + 1], ratio=ratio_list[2], strides=(1 if i == 0 else 2, 1 if i == 0 else 2), dim_match=False, name='stage%d_unit%d' % (i + 1, 1), num_group=num_group, bn_mom=bn_mom, prefix='stage%d_unit%d_' % (i + 1, 1))) topdown_list.append( topdown_residual_unit(fwblock=self.residual_stages[-1], name='stage%d_td_unit%d' % (i + 1, 1), prefix='stage%d_td_unit%d_' % (i + 1, 1))) for j in range(units[i] - 1): self.residual_stages.add( residual_unit(in_channels=filter_list[i + 1], num_filter=filter_list[i + 1], ratio=ratio_list[2], strides=(1, 1), dim_match=True, name='stage%d_unit%d' % (i + 1, j + 2), num_group=num_group, bn_mom=bn_mom, prefix='stage%d_unit%d_' % (i + 1, j + 2))) topdown_list.append( topdown_residual_unit( fwblock=self.residual_stages[-1], name='stage%d_td_unit%d' % (i + 1, j + 2), prefix='stage%d_td_unit%d_' % (i + 1, j + 2))) with self.name_scope(): self.topdown_stages = nn.HybridSequential(prefix='td_residual_') for block in topdown_list[::-1]: self.topdown_stages.add(block) # fw classifier self.pool1 = nn.GlobalAvgPool2D(prefix='pool1_') self.drop1 = nn.Dropout(rate=drop_out, prefix='dp1_') self.fc = nn.Conv2D(in_channels=filter_list[-1], channels=num_class, kernel_size=(1, 1), use_bias=True, prefix='dense_') self.flatten1 = nn.Flatten(prefix='flatten1_') # bw classifier self.reshape = Reshape(shape=(num_class, 1, 1), prefix='reshape_') self.td_drop1 = nn.Dropout(rate=drop_out, prefix='td_dp1_') self.td_fc = nn.Conv2DTranspose(channels=filter_list[-1], in_channels=num_class, kernel_size=(1, 1), strides=(1, 1), use_bias=False, params=self.fc.params, prefix='td_dense_') self.upsample1 = UpsampleLayer(size=4, scale=1. / (4**2), prefix='up1_')
def __init__(self, classes=4, dropout_keep_prob=0.5, **kwargs): """400 classes in the Kinetics dataset.""" super(InceptionI3d, self).__init__(**kwargs) self._num_classes = classes self.dropout_keep_prob = dropout_keep_prob # this is the main classifier with self.name_scope(): self.features = nn.HybridSequential(prefix='') # the input shape is `batch_size` x `num_frames` x 224 x 224 x `num_channels` in tf code # but gluon is NCDHW # input shape is 1, 3, 79, 224, 224 self.features.add( _make_unit3d(channels=64, kernel_size=(7, 7, 7), strides=(2, 2, 2))) # shape is (1, 64, 37, 109, 109) self.features.add( nn.MaxPool3D(pool_size=(1, 3, 3), strides=(1, 2, 2), padding=(0, 55, 55)) ) # here should be 'same' padding; hard code for now. # shape is (1, 64, 37, 109, 109) self.features.add(_make_unit3d(channels=64, kernel_size=(1, 1, 1))) # shape (1, 64, 37, 109, 109) self.features.add(_make_unit3d(channels=192, kernel_size=(3, 3, 3))) # shape (1, 192, 35, 107, 107) self.features.add( nn.MaxPool3D(pool_size=(1, 3, 3), strides=(1, 2, 2), padding=(0, 54, 54))) # padding same # shape (1, 192, 35, 107, 107) self.features.add(_make_mixed_3b('mixed_3b')) self.features.add(_make_mixed_3c('mixed_3c')) #(1, 480, 35, 107, 107) self.features.add( nn.MaxPool3D(pool_size=(3, 3, 3), strides=(2, 2, 2), padding=(18, 54, 54))) # padding is same here self.features.add(_make_mixed_4b('mixed_4b')) # self.features.add(_make_mixed_4c('mixed_4c')) self.features.add(_make_mixed_4d('mixed_4d')) self.features.add(_make_mixed_4e('mixed_4e')) self.features.add(_make_mixed_4f('mixed_4f')) # (1, 384, 35, 107, 107) self.features.add( nn.MaxPool3D(pool_size=(2, 2, 2), strides=(2, 2, 2), padding=(18, 54, 54))) self.features.add(_make_mixed_5b('mixed_5b')) self.features.add(_make_mixed_5c('mixed_5c')) self.features.add(nn.AvgPool3D(pool_size=(2, 7, 7))) self.features.add(nn.Dropout(self.dropout_keep_prob)) self.features.add( _make_unit3d(channels=self._num_classes, kernel_size=(1, 1, 1))) # logits/main classifier outputs endpoint self.output = nn.HybridSequential(prefix='') self.output.add(nn.Flatten()) self.output.add(nn.Dense(self._num_classes))
def __init__(self, channels, init_block_channels, final_block_channels, final_block_groups, dilations, dropout_rate=0.2, bn_use_global_stats=False, in_channels=3, in_size=(224, 224), classes=1000): super(ESPNetv2, self).__init__() self.in_size = in_size self.classes = classes x0_channels = in_channels with self.name_scope(): self.features = DualPathSequential( return_two=False, first_ordinals=0, last_ordinals=2, prefix="") self.features.add(ESPInitBlock( in_channels=in_channels, out_channels=init_block_channels, bn_use_global_stats=bn_use_global_stats)) in_channels = init_block_channels for i, channels_per_stage in enumerate(channels): stage = DualPathSequential(prefix="stage{}_".format(i + 1)) for j, out_channels in enumerate(channels_per_stage): if j == 0: unit = DownsampleBlock( in_channels=in_channels, out_channels=out_channels, x0_channels=x0_channels, dilations=dilations[i][j], bn_use_global_stats=bn_use_global_stats) else: unit = ESPBlock( in_channels=in_channels, out_channels=out_channels, strides=1, dilations=dilations[i][j], bn_use_global_stats=bn_use_global_stats) stage.add(unit) in_channels = out_channels self.features.add(stage) self.features.add(ESPFinalBlock( in_channels=in_channels, out_channels=final_block_channels, final_groups=final_block_groups, bn_use_global_stats=bn_use_global_stats)) in_channels = final_block_channels self.features.add(nn.AvgPool2D( pool_size=7, strides=1)) self.output = nn.HybridSequential(prefix="") self.output.add(nn.Flatten()) self.output.add(nn.Dropout(rate=dropout_rate)) self.output.add(nn.Dense( units=classes, in_units=in_channels))
def get_net(): net = nn.Sequential() with net.name_scope(): net.add(nn.Flatten(), nn.Dense(120, activation="relu"), nn.Dense(84, activation="relu"), nn.Dense(10)) return net
def __init__(self, layers, stem_width=32, dropblock_prob=0.0, final_drop=0.0, input_size=224, in_channels=3, in_size=(224, 244), classes=1000): self.in_size = in_size self.classes = classes block = Bottleneck avg_down = True cardinality = 1 avd = True avd_first = False use_splat = True bottleneck_width = 64 radix = 2 split_drop_ratio = 0 dilated = False dilation = 1 norm_layer = BatchNorm norm_kwargs = None last_gamma = False self.cardinality = cardinality self.bottleneck_width = bottleneck_width self.inplanes = stem_width * 2 self.radix = radix self.split_drop_ratio = split_drop_ratio self.avd_first = avd_first super(ResNeSt, self).__init__(prefix='resnest_') norm_kwargs = norm_kwargs if norm_kwargs is not None else {} self.norm_kwargs = norm_kwargs with self.name_scope(): self.conv1 = nn.HybridSequential(prefix='conv1') self.conv1.add( nn.Conv2D(channels=stem_width, kernel_size=3, strides=2, padding=1, use_bias=False, in_channels=3)) self.conv1.add(norm_layer(in_channels=stem_width, **norm_kwargs)) self.conv1.add(nn.Activation('relu')) self.conv1.add( nn.Conv2D(channels=stem_width, kernel_size=3, strides=1, padding=1, use_bias=False, in_channels=stem_width)) self.conv1.add(norm_layer(in_channels=stem_width, **norm_kwargs)) self.conv1.add(nn.Activation('relu')) self.conv1.add( nn.Conv2D(channels=stem_width * 2, kernel_size=3, strides=1, padding=1, use_bias=False, in_channels=stem_width)) input_size = _update_input_size(input_size, 2) self.bn1 = norm_layer(in_channels=stem_width * 2, **norm_kwargs) self.relu = nn.Activation('relu') self.maxpool = nn.MaxPool2D(pool_size=3, strides=2, padding=1) input_size = _update_input_size(input_size, 2) self.layer1 = self._make_layer(1, block, 64, layers[0], avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, use_splat=use_splat, avd=avd) self.layer2 = self._make_layer(2, block, 128, layers[1], strides=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, use_splat=use_splat, avd=avd) input_size = _update_input_size(input_size, 2) if dilated or dilation == 4: self.layer3 = self._make_layer(3, block, 256, layers[2], strides=1, dilation=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd) self.layer4 = self._make_layer(4, block, 512, layers[3], strides=1, dilation=4, pre_dilation=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd) elif dilation == 3: # special self.layer3 = self._make_layer(3, block, 256, layers[2], strides=1, dilation=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd) self.layer4 = self._make_layer(4, block, 512, layers[3], strides=2, dilation=2, pre_dilation=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd) elif dilation == 2: self.layer3 = self._make_layer(3, block, 256, layers[2], strides=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd) self.layer4 = self._make_layer(4, block, 512, layers[3], strides=1, dilation=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd) else: self.layer3 = self._make_layer(3, block, 256, layers[2], strides=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd) input_size = _update_input_size(input_size, 2) self.layer4 = self._make_layer(4, block, 512, layers[3], strides=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd) input_size = _update_input_size(input_size, 2) self.avgpool = nn.GlobalAvgPool2D() self.flat = nn.Flatten() self.drop = None if final_drop > 0.0: self.drop = nn.Dropout(final_drop) self.fc = nn.Dense(in_units=512 * block.expansion, units=classes)
def __init__(self, in_channels, num_filter, ratio, strides, dim_match, name, num_group, bn_mom=0.9, **kwargs): super(residual_unit, self).__init__(**kwargs) self.dim_match = dim_match self.num_filter = num_filter # block 1 self.conv1 = nn.Conv2D(in_channels=in_channels, channels=int(num_filter * 0.5), kernel_size=(1, 1), strides=(1, 1), padding=(0, 0), use_bias=False, prefix=name + '_conv1_') self.bn1 = nn.BatchNorm(in_channels=int(num_filter * 0.5), epsilon=2e-5, momentum=bn_mom, prefix=name + '_batchnorm1_') self.bn1min = nn.BatchNorm(in_channels=int(num_filter * 0.5), epsilon=2e-5, momentum=bn_mom, prefix=name + '_batchnorm1min_') self.relu1 = nn.Activation(activation='relu', prefix=name + '_relu1_') self.relu1min = NReLu(prefix=name + '_relu1min_') # block 2 self.conv2 = nn.Conv2D(in_channels=int(num_filter * 0.5), channels=int(num_filter * 0.5), groups=num_group, kernel_size=(3, 3), strides=strides, padding=(1, 1), use_bias=False, prefix=name + '_conv2_') self.bn2 = nn.BatchNorm(in_channels=int(num_filter * 0.5), epsilon=2e-5, momentum=bn_mom, prefix=name + '_batchnorm2_') self.bn2min = nn.BatchNorm(in_channels=int(num_filter * 0.5), epsilon=2e-5, momentum=bn_mom, prefix=name + '_batchnorm2min_') self.relu2 = nn.Activation(activation='relu', prefix=name + '_relu2_') self.relu2min = NReLu(prefix=name + '_relu2min_') # block 3 self.conv3 = nn.Conv2D(in_channels=int(num_filter * 0.5), channels=num_filter, kernel_size=(1, 1), strides=(1, 1), padding=(0, 0), use_bias=False, prefix=name + '_conv3_') self.bn3 = nn.BatchNorm(in_channels=num_filter, epsilon=2e-5, momentum=bn_mom, prefix=name + '_batchnorm3_') self.bn3min = nn.BatchNorm(in_channels=num_filter, epsilon=2e-5, momentum=bn_mom, prefix=name + '_batchnorm3min_') # squeeze self.pool = nn.GlobalAvgPool2D(prefix=name + '_squeeze_') self.flatten = nn.Flatten(prefix=name + '_flatten_') # excitation 1 self.fc1 = nn.Dense(units=int(num_filter * ratio), in_units=num_filter, prefix=name + '_excitation1_dense_') self.reluex1 = nn.Activation(activation='relu', prefix=name + '_excitation1_relu_') self.reluex1min = NReLu(prefix=name + '_excitation1_relumin_') # excitation 2 self.fc2 = nn.Dense(units=num_filter, in_units=int(num_filter * ratio), prefix=name + '_excitation2_dense_') self.reluex2 = nn.Activation(activation='sigmoid', prefix=name + '_excitation2_sigmoid_') self.reluex2min = NSigmoid(prefix=name + '_excitation2_sigmoidmin_') if not dim_match: self.fc_sc = nn.Conv2D(in_channels=in_channels, channels=num_filter, kernel_size=(1, 1), strides=strides, use_bias=False, prefix=name + '_sc_dense_') self.bn_sc = nn.BatchNorm(in_channels=num_filter, epsilon=2e-5, momentum=bn_mom, prefix=name + '_sc_batchnorm_') self.bn_scmin = nn.BatchNorm(in_channels=num_filter, epsilon=2e-5, momentum=bn_mom, prefix=name + '_sc_batchnormmin_') self.relu3 = nn.Activation(activation='relu', prefix=name + '_relu3_') self.relu3min = NReLu(prefix=name + '_relu3min_')
net = nn.Sequential() # add name_scope on the outer most Sequential with net.name_scope(): net.add( mlpconv(96, 11, 0, strides=4), mlpconv(256, 5, 2), mlpconv(384, 3, 1), nn.Dropout(.5), # 目标类为10类 (10个通道) mlpconv(10, 3, 1, max_pooling=False), # 输入为 batch_size x 10 x 5 x 5, 通过AvgPool2D转成 # batch_size x 10 x 1 x 1。 # 我们可以使用 nn.AvgPool2D(pool_size=5), # 但更方便是使用全局池化,可以避免估算pool_size大小 nn.GlobalAvgPool2D(), # 转成 batch_size x 10 nn.Flatten()) ########################################################################## # train train_data, test_data = utils.load_data_fashion_mnist(batch_size=64, resize=224) ctx = utils.try_gpu() net.initialize(ctx=ctx, init=init.Xavier()) loss = gluon.loss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1}) utils.train(train_data, test_data, net, loss, trainer, ctx, num_epochs=1)
def __init__(self, light=False, stage_channels=(16, 32, 64, 128), classes=1000, norm_layer=nn.BatchNorm, norm_kwargs=None, activation='prelu', drop=0., **kwargs): super(EPRNetCls, self).__init__() width1, width2, width3, width4 = tuple(stage_channels) self.stage_channels = stage_channels with self.name_scope(): self.conv = nn.Conv2D(channels=width1, kernel_size=3, strides=2, padding=1, use_bias=False) self.bn = norm_layer( **({} if norm_kwargs is None else norm_kwargs)) self.act = Activation(activation) self.layer1 = nn.HybridSequential() self.layer1.add( _EPRModule(channels=width2, in_channels=width1, atrous_rates=(1, 2, 4), norm_layer=norm_layer, norm_kwargs=norm_kwargs, activation=activation, down_sample=False, light=light), _EPRModule(channels=width2, in_channels=width2, atrous_rates=(1, 2, 4), norm_layer=norm_layer, norm_kwargs=norm_kwargs, activation=activation, down_sample=True, light=light)) self.layer2 = nn.HybridSequential() self.layer2.add( _EPRModule(channels=width3, in_channels=width2, atrous_rates=(3, 6, 9), norm_layer=norm_layer, norm_kwargs=norm_kwargs, activation=activation, down_sample=False, light=light), _EPRModule(channels=width3, in_channels=width3, atrous_rates=(3, 6, 9), norm_layer=norm_layer, norm_kwargs=norm_kwargs, activation=activation, down_sample=False, light=light), _EPRModule(channels=width3, in_channels=width3, atrous_rates=(3, 6, 9), norm_layer=norm_layer, norm_kwargs=norm_kwargs, activation=activation, down_sample=False, light=light), _EPRModule(channels=width3, in_channels=width3, atrous_rates=(3, 6, 9), norm_layer=norm_layer, norm_kwargs=norm_kwargs, activation=activation, down_sample=True, light=light)) self.layer3 = nn.HybridSequential() self.layer3.add( _EPRModule(channels=width4, in_channels=width3, atrous_rates=(7, 13, 19), norm_layer=norm_layer, norm_kwargs=norm_kwargs, activation=activation, down_sample=False, light=light), _EPRModule(channels=width4, in_channels=width4, atrous_rates=(13, 25, 37), norm_layer=norm_layer, norm_kwargs=norm_kwargs, activation=activation, down_sample=False, light=light)) self.avg_pool = nn.GlobalAvgPool2D() self.flat = nn.Flatten() self.drop = nn.Dropout(drop) if drop > 0. else None self.linear = nn.Dense(units=classes)
def __init__(self, channels, init_block_channels, final_block_channels, kernel_sizes, strides_per_stage, expansion_factors, dropout_rate=0.2, tf_mode=False, bn_epsilon=1e-5, bn_use_global_stats=False, in_channels=3, in_size=(224, 224), classes=1000, **kwargs): super(EfficientNet, self).__init__(**kwargs) self.in_size = in_size self.classes = classes activation = "swish" with self.name_scope(): self.features = nn.HybridSequential(prefix="") self.features.add( EffiInitBlock(in_channels=in_channels, out_channels=init_block_channels, bn_epsilon=bn_epsilon, bn_use_global_stats=bn_use_global_stats, activation=activation, tf_mode=tf_mode)) in_channels = init_block_channels for i, channels_per_stage in enumerate(channels): kernel_sizes_per_stage = kernel_sizes[i] expansion_factors_per_stage = expansion_factors[i] stage = nn.HybridSequential(prefix="stage{}_".format(i + 1)) with stage.name_scope(): for j, out_channels in enumerate(channels_per_stage): kernel_size = kernel_sizes_per_stage[j] expansion_factor = expansion_factors_per_stage[j] strides = strides_per_stage[i] if (j == 0) else 1 if i == 0: stage.add( EffiDwsConvUnit( in_channels=in_channels, out_channels=out_channels, strides=strides, bn_epsilon=bn_epsilon, bn_use_global_stats=bn_use_global_stats, activation=activation, tf_mode=tf_mode)) else: stage.add( EffiInvResUnit( in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, strides=strides, expansion_factor=expansion_factor, bn_epsilon=bn_epsilon, bn_use_global_stats=bn_use_global_stats, activation=activation, tf_mode=tf_mode)) in_channels = out_channels self.features.add(stage) self.features.add( conv1x1_block(in_channels=in_channels, out_channels=final_block_channels, bn_epsilon=bn_epsilon, bn_use_global_stats=bn_use_global_stats, activation=activation)) in_channels = final_block_channels self.features.add(nn.GlobalAvgPool2D()) self.output = nn.HybridSequential(prefix="") self.output.add(nn.Flatten()) if dropout_rate > 0.0: self.output.add(nn.Dropout(rate=dropout_rate)) self.output.add(nn.Dense(units=classes, in_units=in_channels))
def __init__(self, block, layers, classes=1000, dilated=False, norm_layer=BatchNorm, norm_kwargs=None, last_gamma=False, deep_stem=False, stem_width=32, avg_down=False, final_drop=0.0, use_global_stats=False, name_prefix='', **kwargs): self.inplanes = stem_width * 2 if deep_stem else 64 super(ResNetV1b, self).__init__(prefix=name_prefix) norm_kwargs = norm_kwargs if norm_kwargs is not None else {} if use_global_stats: norm_kwargs['use_global_stats'] = True self.norm_kwargs = norm_kwargs with self.name_scope(): if not deep_stem: self.conv1 = nn.Conv2D(channels=64, kernel_size=7, strides=2, padding=3, use_bias=False) else: self.conv1 = nn.HybridSequential(prefix='conv1') self.conv1.add( nn.Conv2D(channels=stem_width, kernel_size=3, strides=2, padding=1, use_bias=False)) self.conv1.add( norm_layer(in_channels=stem_width, **norm_kwargs)) self.conv1.add(nn.Activation('relu')) self.conv1.add( nn.Conv2D(channels=stem_width, kernel_size=3, strides=1, padding=1, use_bias=False)) self.conv1.add( norm_layer(in_channels=stem_width, **norm_kwargs)) self.conv1.add(nn.Activation('relu')) self.conv1.add( nn.Conv2D(channels=stem_width * 2, kernel_size=3, strides=1, padding=1, use_bias=False)) self.bn1 = norm_layer( in_channels=64 if not deep_stem else stem_width * 2, **norm_kwargs) self.relu = nn.Activation('relu') self.maxpool = nn.MaxPool2D(pool_size=3, strides=2, padding=1) self.layer1 = self._make_layer(1, block, 64, layers[0], avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma) self.layer2 = self._make_layer(2, block, 128, layers[1], strides=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma) if dilated: self.layer3 = self._make_layer(3, block, 256, layers[2], strides=1, dilation=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma) self.layer4 = self._make_layer(4, block, 512, layers[3], strides=1, dilation=4, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma) else: self.layer3 = self._make_layer(3, block, 256, layers[2], strides=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma) self.layer4 = self._make_layer(4, block, 512, layers[3], strides=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma) self.avgpool = nn.GlobalAvgPool2D() self.flat = nn.Flatten() self.drop = None if final_drop > 0.0: self.drop = nn.Dropout(final_drop) self.fc = nn.Dense(in_units=512 * block.expansion, units=classes)
def __init__(self, channels, init_block_channels, final_block_channels, residuals, shortcuts, kernel_sizes, expansions, bn_epsilon=1e-3, bn_use_global_stats=False, in_channels=3, in_size=(224, 224), classes=1000): super(ProxylessNAS, self).__init__() self.in_size = in_size self.classes = classes with self.name_scope(): self.features = nn.HybridSequential(prefix='') self.features.add( conv3x3_block(in_channels=in_channels, out_channels=init_block_channels, strides=2, bn_epsilon=bn_epsilon, bn_use_global_stats=bn_use_global_stats, activation="relu6")) in_channels = init_block_channels for i, channels_per_stage in enumerate(channels): stage = nn.HybridSequential(prefix="stage{}_".format(i + 1)) residuals_per_stage = residuals[i] shortcuts_per_stage = shortcuts[i] kernel_sizes_per_stage = kernel_sizes[i] expansions_per_stage = expansions[i] with stage.name_scope(): for j, out_channels in enumerate(channels_per_stage): residual = (residuals_per_stage[j] == 1) shortcut = (shortcuts_per_stage[j] == 1) kernel_size = kernel_sizes_per_stage[j] expansion = expansions_per_stage[j] strides = 2 if (j == 0) and (i != 0) else 1 stage.add( ProxylessUnit( in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, strides=strides, bn_epsilon=bn_epsilon, bn_use_global_stats=bn_use_global_stats, expansion=expansion, residual=residual, shortcut=shortcut)) in_channels = out_channels self.features.add(stage) self.features.add( conv1x1_block(in_channels=in_channels, out_channels=final_block_channels, bn_epsilon=bn_epsilon, bn_use_global_stats=bn_use_global_stats, activation="relu6")) in_channels = final_block_channels self.features.add(nn.AvgPool2D(pool_size=7, strides=1)) self.output = nn.HybridSequential(prefix='') self.output.add(nn.Flatten()) self.output.add(nn.Dense(units=classes, in_units=in_channels))
h4_linear = nd.dot(h3, lenet_W4) + lenet_b4 if verbose: print('1st conv block:', h1.shape) print('2nd conv block:', h2.shape) print('1st dense:', h3.shape) print('2nd dense:', h4_linear.shape) print('output:', h4_linear) return h4_linear lenet = nn.Sequential() with lenet.name_scope(): lenet.add(nn.Conv2D(channels=20, kernel_size=5, activation='relu'), nn.MaxPool2D(pool_size=2, strides=2), nn.Conv2D(channels=50, kernel_size=3, activation='relu'), nn.MaxPool2D(pool_size=2, strides=2), nn.Flatten(), nn.Dense(128, activation="relu"), nn.Dense(10)) lenet.initialize(ctx=ctx) arch_A = ((1, 64), (1, 128), (2, 256), (2, 512), (2, 512)) arch_B = ((2, 64), (2, 128), (2, 256), (2, 512), (2, 512)) arch_D = ((2, 64), (2, 128), (3, 256), (3, 512), (3, 512)) arch_E = ((2, 64), (2, 128), (4, 256), (4, 512), (4, 512)) def vgg_stack(arch): out = nn.Sequential() for (num_convs, channels) in arch: seq = nn.Sequential() for _ in range(num_convs): seq.add(
def __init__(self, direct_channels, skip_channels, init_block_channels, bn_use_global_stats=False, in_channels=3, in_size=(224, 224), classes=1000, **kwargs): super(FishNet, self).__init__(**kwargs) self.in_size = in_size self.classes = classes depth = len(direct_channels[0]) down1_channels = direct_channels[0] up_channels = direct_channels[1] down2_channels = direct_channels[2] skip1_channels = skip_channels[0] skip2_channels = skip_channels[1] with self.name_scope(): self.features = nn.HybridSequential(prefix='') self.features.add( SEInitBlock(in_channels=in_channels, out_channels=init_block_channels, bn_use_global_stats=bn_use_global_stats)) in_channels = init_block_channels down1_seq = nn.HybridSequential(prefix='') skip1_seq = nn.HybridSequential(prefix='') for i in range(depth + 1): skip1_channels_list = skip1_channels[i] if i < depth: skip1_seq.add( SkipUnit(in_channels=in_channels, out_channels_list=skip1_channels_list, bn_use_global_stats=bn_use_global_stats)) down1_channels_list = down1_channels[i] down1_seq.add( DownUnit(in_channels=in_channels, out_channels_list=down1_channels_list, bn_use_global_stats=bn_use_global_stats)) in_channels = down1_channels_list[-1] else: skip1_seq.add( SkipAttUnit(in_channels=in_channels, out_channels_list=skip1_channels_list, bn_use_global_stats=bn_use_global_stats)) in_channels = skip1_channels_list[-1] up_seq = nn.HybridSequential(prefix='') skip2_seq = nn.HybridSequential(prefix='') for i in range(depth + 1): skip2_channels_list = skip2_channels[i] if i > 0: in_channels += skip1_channels[depth - i][-1] if i < depth: skip2_seq.add( SkipUnit(in_channels=in_channels, out_channels_list=skip2_channels_list, bn_use_global_stats=bn_use_global_stats)) up_channels_list = up_channels[i] dilation = 2**i up_seq.add( UpUnit(in_channels=in_channels, out_channels_list=up_channels_list, dilation=dilation, bn_use_global_stats=bn_use_global_stats)) in_channels = up_channels_list[-1] else: skip2_seq.add(Identity()) down2_seq = nn.HybridSequential(prefix='') for i in range(depth): down2_channels_list = down2_channels[i] down2_seq.add( DownUnit(in_channels=in_channels, out_channels_list=down2_channels_list, bn_use_global_stats=bn_use_global_stats)) in_channels = down2_channels_list[-1] + skip2_channels[depth - 1 - i][-1] self.features.add( SesquialteralHourglass(down1_seq=down1_seq, skip1_seq=skip1_seq, up_seq=up_seq, skip2_seq=skip2_seq, down2_seq=down2_seq)) self.features.add( FishFinalBlock(in_channels=in_channels, bn_use_global_stats=bn_use_global_stats)) in_channels = in_channels // 2 self.features.add(nn.AvgPool2D(pool_size=7, strides=1)) self.output = nn.HybridSequential(prefix='') self.output.add( conv1x1(in_channels=in_channels, out_channels=classes, use_bias=True)) self.output.add(nn.Flatten())
batch_size=batch_size, shuffle=False) # model net = nn.Sequential() #net.add( # nn.Dense(500,activation='relu'), # nn.Dense(256,activation='relu'), # nn.Dropout(dropout_rate), # nn.Dense(out_put_num,activation='sigmoid') # ) net.add(nn.Conv1D(8, kernel_size=5, activation='relu'), nn.Conv1D(16, kernel_size=5, activation='relu'), nn.BatchNorm(momentum=0.8), nn.MaxPool1D(pool_size=2), nn.Conv1D(16, kernel_size=1, activation='relu'), nn.Conv1D(16, kernel_size=5, activation='relu'), nn.Flatten(), nn.Dense(256, activation='relu'), nn.Dropout(0.25), nn.Dense(out_put_num, activation='relu')) net.initialize(mx.init.Xavier(magnitude=2.24)) #net.initialize(mx.init.MSRAPrelu()) #net.initialize(mx.init.Normal(0.5) ,ctx=ctx) #net.load_parameters(para_filepath) net.collect_params().reset_ctx(ctx) # solve loss = gloss.SoftmaxCrossEntropyLoss() metric = mx.metric.Accuracy() def test(): metric = mx.metric.Accuracy()
def __init__(self, block, layers, cardinality=1, bottleneck_width=64, classes=1000, dilated=False, dilation=1, norm_layer=nn.BatchNorm, norm_kwargs=None, last_gamma=False, deep_stem=False, stem_width=32, avg_down=False, final_drop=0.0, use_global_stats=False, name_prefix='', dropblock_prob=0, input_size=224, use_splat=False, radix=2, avd=False, avd_first=False, split_drop_ratio=0): self.cardinality = cardinality self.bottleneck_width = bottleneck_width self.inplanes = stem_width * 2 if deep_stem else 64 self.radix = radix self.split_drop_ratio = split_drop_ratio self.avd_first = avd_first super(ResNet, self).__init__(prefix=name_prefix) norm_kwargs = norm_kwargs if norm_kwargs is not None else {} if use_global_stats: norm_kwargs['use_global_stats'] = True self.norm_kwargs = norm_kwargs with self.name_scope(): if not deep_stem: # use 3*3 with stride1 instead of 7*7 self.conv1 = nn.Conv2D(channels=64, kernel_size=3, strides=1, padding=1, use_bias=False, in_channels=3) else: self.conv1 = nn.HybridSequential(prefix='conv1') self.conv1.add( nn.Conv2D(channels=stem_width, kernel_size=3, strides=1, padding=1, use_bias=False, in_channels=3)) self.conv1.add( norm_layer(in_channels=stem_width, **norm_kwargs)) self.conv1.add(gluon_act(config.net_act)) self.conv1.add( nn.Conv2D(channels=stem_width, kernel_size=3, strides=1, padding=1, use_bias=False, in_channels=stem_width)) self.conv1.add( norm_layer(in_channels=stem_width, **norm_kwargs)) self.conv1.add(gluon_act(config.net_act)) self.conv1.add( nn.Conv2D(channels=stem_width * 2, kernel_size=3, strides=1, padding=1, use_bias=False, in_channels=stem_width)) self.bn1 = norm_layer( in_channels=64 if not deep_stem else stem_width * 2, **norm_kwargs) self.relu = gluon_act(config.net_act) # stage 1 self.layer1 = self._make_layer(1, block, 64, layers[0], strides=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, use_splat=use_splat, avd=avd) input_size = _update_input_size(input_size, 2) # stage 2 self.layer2 = self._make_layer(2, block, 128, layers[1], strides=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, use_splat=use_splat, avd=avd) input_size = _update_input_size(input_size, 2) # stage3 ~ stage4 if dilated or dilation == 4: self.layer3 = self._make_layer(3, block, 256, layers[2], strides=1, dilation=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd) self.layer4 = self._make_layer(4, block, 512, layers[3], strides=1, dilation=4, pre_dilation=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd) elif dilation == 3: # special self.layer3 = self._make_layer(3, block, 256, layers[2], strides=1, dilation=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd) self.layer4 = self._make_layer(4, block, 512, layers[3], strides=2, dilation=2, pre_dilation=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd) elif dilation == 2: self.layer3 = self._make_layer(3, block, 256, layers[2], strides=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd) self.layer4 = self._make_layer(4, block, 512, layers[3], strides=1, dilation=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd) else: self.layer3 = self._make_layer(3, block, 256, layers[2], strides=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd) input_size = _update_input_size(input_size, 2) self.layer4 = self._make_layer(4, block, 512, layers[3], strides=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd) input_size = _update_input_size(input_size, 2) self.flat = nn.Flatten()
def train_mnist(): # Select a fixed random seed for reproducibility mx.random.seed(42) if version == '': net = nn.HybridSequential(prefix='DApp_') with net.name_scope(): net.add( nn.Conv2D(channels=16, kernel_size=(3, 3), activation='relu'), nn.MaxPool2D(pool_size=(2, 2), strides=(1, 1)), nn.Conv2D(channels=32, kernel_size=(3, 3), activation='relu'), nn.MaxPool2D(pool_size=(2, 2), strides=(1, 1)), nn.Conv2D(channels=64, kernel_size=(3, 3), activation='relu'), nn.MaxPool2D(pool_size=(2, 2), strides=(2, 2)), nn.Conv2D(channels=128, kernel_size=(1, 1), activation='relu'), nn.MaxPool2D(pool_size=(2, 2), strides=(2, 2)), nn.Flatten(), nn.Dense(10, activation=None), ) elif version == 'lenet': net = nn.HybridSequential(prefix='LeNet_') with net.name_scope(): net.add( nn.Conv2D(channels=20, kernel_size=(5, 5), activation='relu'), nn.MaxPool2D(pool_size=(2, 2), strides=(2, 2)), nn.Conv2D(channels=50, kernel_size=(5, 5), activation='relu'), nn.MaxPool2D(pool_size=(2, 2), strides=(2, 2)), nn.Flatten(), nn.Dense(500, activation='relu'), nn.Dense(10, activation=None), ) elif version == 'mlp': net = nn.HybridSequential(prefix='MLP_') with net.name_scope(): net.add( nn.Flatten(), nn.Dense(128, activation='relu'), nn.Dense(64, activation='relu'), nn.Dense(10, activation=None) # loss function includes softmax already, see below ) net.initialize(mx.init.Xavier(), ctx=ctx) net.summary(nd.zeros((1, 1, 28, 28), ctx=ctx)) trainer = gluon.Trainer( params=net.collect_params(), optimizer='adam', optimizer_params={'learning_rate': 1e-3}, ) metric = mx.metric.Accuracy() loss_function = gluon.loss.SoftmaxCrossEntropyLoss() num_epochs = 10 for epoch in range(num_epochs): for inputs, labels in train_loader: inputs = inputs.as_in_context(ctx) labels = labels.as_in_context(ctx) with autograd.record(): outputs = net(inputs) loss = loss_function(outputs, labels) loss.backward() metric.update(labels, outputs) trainer.step(batch_size=inputs.shape[0]) name, acc = metric.get() print('After epoch {}: {} = {:5.2%}'.format(epoch + 1, name, acc)) metric.reset() for inputs, labels in val_loader: inputs = inputs.as_in_context(ctx) labels = labels.as_in_context(ctx) metric.update(labels, net(inputs)) print('Validaton: {} = {}'.format(*metric.get())) assert metric.get()[1] > 0.96 sym = net(mx.sym.var('data')) sym_file, param_file = load_fname(version) open(sym_file, "w").write(sym.tojson()) net.collect_params().save(param_file)
def get_block(block_mode='conv', act_mode='relu', use_se=False): if block_mode == 'just-conv': net = gluon.nn.HybridSequential() net.add( nn.Conv2D(16, kernel_size=3, strides=2, padding=1, use_bias=False, prefix='1st_conv_'), nn.BatchNorm(momentum=0.1), Activation(act_mode) ) if use_se: net.add(SE(16)) net.add( nn.Conv2D(32, in_channels=16, kernel_size=3, strides=2, padding=1, use_bias=False, prefix='2nd_conv_'), nn.BatchNorm(momentum=0.1), Activation(act_mode) ) elif block_mode == 'SNB': net = gluon.nn.HybridSequential() net.add( nn.Conv2D(16, kernel_size=3, strides=2, padding=1, use_bias=False, prefix='1st_conv_'), nn.BatchNorm(momentum=0.1), Activation(act_mode) ) if use_se: net.add(SE(16)) net.add( ShuffleNetBlock(16, 32, 16, bn=nn.BatchNorm, block_mode='ShuffleNetV2', ksize=3, stride=1, use_se=use_se, act_name=act_mode) ) elif block_mode == 'SNB-x': net = gluon.nn.HybridSequential() net.add( nn.Conv2D(16, kernel_size=3, strides=2, padding=1, use_bias=False, prefix='1st_conv_'), nn.BatchNorm(momentum=0.1), Activation(act_mode) ) if use_se: net.add(SE(16)) net.add( ShuffleNetBlock(16, 32, 16, bn=nn.BatchNorm, block_mode='ShuffleXception', ksize=3, stride=1, use_se=use_se, act_name=act_mode) ) elif block_mode == 'ShuffleNas_fixArch': architecture = [0, 0, 0, 0, 0, 0, 1, 1, 2, 0, 1, 1, 0, 0, 1, 2, 2, 0, 2, 0] scale_ids = [8, 6, 5, 7, 6, 7, 3, 4, 2, 4, 2, 3, 4, 3, 6, 7, 5, 3, 4, 6] net = get_shufflenas_oneshot(architecture=architecture, scale_ids=scale_ids, use_se=True, last_conv_after_pooling=True) else: raise ValueError("Unrecognized mode: {}".format(block_mode)) if block_mode != 'ShuffleNas_fixArch': net.add(nn.GlobalAvgPool2D(), nn.Conv2D(10, in_channels=32, kernel_size=1, strides=1, padding=0, use_bias=True), nn.Flatten() ) else: net.output = nn.HybridSequential(prefix='output_') with net.output.name_scope(): net.output.add( nn.Conv2D(10, in_channels=1024, kernel_size=1, strides=1, padding=0, use_bias=True), nn.Flatten() ) return net
def __init__(self, net_name, batch_size, num_class, use_bias=False, use_bn=False, do_topdown=False, do_countpath=False, do_pn=False, relu_td=False, do_nn=False): super(NRM, self).__init__() self.num_class = num_class self.do_topdown = do_topdown self.do_countpath = do_countpath self.do_pn = do_pn self.relu_td = relu_td self.do_nn = do_nn self.use_bn = use_bn self.use_bias = use_bias self.batch_size = batch_size self.features, layers_drm, layers_drm_cp = self._make_layers( cfg[net_name], use_bias, use_bn, self.do_topdown, self.do_countpath) with self.name_scope(): self.classifier = nn.HybridSequential(prefix='classifier_') conv_layer = nn.Conv2D(in_channels=cfg[net_name][-2], channels=self.num_class, kernel_size=(1, 1), use_bias=True) self.classifier.add(conv_layer) self.classifier.add(nn.Flatten()) if self.do_topdown: layers_drm += [ nn.Conv2DTranspose(channels=cfg[net_name][-2], in_channels=self.num_class, kernel_size=(1, 1), strides=(1, 1), use_bias=False, params=conv_layer.params), Reshape(shape=(self.num_class, 1, 1)) ] with self.name_scope(): self.drm = nn.HybridSequential(prefix='drmtd_') for block in layers_drm[::-1]: self.drm.add(block) if self.do_pn: with self.name_scope(): self.insnorms = nn.HybridSequential(prefix='instancenorm_') for i in range(len(self.drm._children)): if (self.drm._children[i].name.find('batchnorm') != -1) and (i < (len(self.drm._children) - 1)): self.insnorms.add(InstanceNorm()) with self.name_scope(): self.insnorms_fw = nn.HybridSequential( prefix='instancenormfw_') for i in range(len(self.features._children)): if (self.features._children[i].name.find('batchnorm') != -1): self.insnorms_fw.add(InstanceNorm()) if self.do_countpath: layers_drm_cp += [ nn.Conv2DTranspose(channels=cfg[net_name][-2], in_channels=self.num_class, kernel_size=(1, 1), strides=(1, 1), use_bias=False), Reshape(shape=(self.num_class, 1, 1)) ] with self.name_scope(): self.drm_cp = nn.HybridSequential(prefix='drmcp_') for block in layers_drm_cp[::-1]: self.drm_cp.add(block)
import sys sys.path.append('..') import gluonbook as gb from mxnet import autograd, gluon, init, nd from mxnet.gluon import loss as gloss, nn batch_size = 256 train_iter, test_iter = gb.load_data_fashion_mnist(batch_size) #初始化模型 net = nn.Sequential() #代表全连接层 net.add(nn.Flatten()) #图片压成向量 net.add(nn.Dense(10)) #增加10个节点的输出层 net.initialize(init.Normal(sigma=0.01)) #定义损失函数 loss = gloss.SoftmaxCrossEntropyLoss() #定义优化算法 trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1}) num_epochs = 5 gb.train_cpu(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, trainer)
def __init__(self, num_classes=1001): super(NASNetALarge, self).__init__() self.num_classes = num_classes self.conv0 = nn.HybridSequential() self.conv0.add(nn.Conv2D(channels=96, kernel_size=3, padding=0, strides=1, use_bias=False)) self.conv0.add(nn.BatchNorm(epsilon=0.001, momentum=0.1)) self.cell_stem_0 = CellStem0() self.cell_stem_1 = CellStem1() self.cell_0 = FirstCell(in_channels_left=168, out_channels_left=84, in_channels_right=336, out_channels_right=168) self.cell_1 = NormalCell(in_channels_left=336, out_channels_left=168, in_channels_right=1008, out_channels_right=168) self.cell_2 = NormalCell(in_channels_left=1008, out_channels_left=168, in_channels_right=1008, out_channels_right=168) self.cell_3 = NormalCell(in_channels_left=1008, out_channels_left=168, in_channels_right=1008, out_channels_right=168) self.cell_4 = NormalCell(in_channels_left=1008, out_channels_left=168, in_channels_right=1008, out_channels_right=168) self.cell_5 = NormalCell(in_channels_left=1008, out_channels_left=168, in_channels_right=1008, out_channels_right=168) self.reduction_cell_0 = ReductionCell0(in_channels_left=1008, out_channels_left=336, in_channels_right=1008, out_channels_right=336) self.cell_6 = FirstCell(in_channels_left=1008, out_channels_left=168, in_channels_right=1344, out_channels_right=336) self.cell_7 = NormalCell(in_channels_left=1344, out_channels_left=336, in_channels_right=2016, out_channels_right=336) self.cell_8 = NormalCell(in_channels_left=2016, out_channels_left=336, in_channels_right=2016, out_channels_right=336) self.cell_9 = NormalCell(in_channels_left=2016, out_channels_left=336, in_channels_right=2016, out_channels_right=336) self.cell_10 = NormalCell(in_channels_left=2016, out_channels_left=336, in_channels_right=2016, out_channels_right=336) self.cell_11 = NormalCell(in_channels_left=2016, out_channels_left=336, in_channels_right=2016, out_channels_right=336) self.reduction_cell_1 = ReductionCell1(in_channels_left=2016, out_channels_left=672, in_channels_right=2016, out_channels_right=672) self.cell_12 = FirstCell(in_channels_left=2016, out_channels_left=336, in_channels_right=2688, out_channels_right=672) self.cell_13 = NormalCell(in_channels_left=2688, out_channels_left=672, in_channels_right=4032, out_channels_right=672) self.cell_14 = NormalCell(in_channels_left=4032, out_channels_left=672, in_channels_right=4032, out_channels_right=672) self.cell_15 = NormalCell(in_channels_left=4032, out_channels_left=672, in_channels_right=4032, out_channels_right=672) self.cell_16 = NormalCell(in_channels_left=4032, out_channels_left=672, in_channels_right=4032, out_channels_right=672) self.cell_17 = NormalCell(in_channels_left=4032, out_channels_left=672, in_channels_right=4032, out_channels_right=672) self.relu = nn.Activation(activation='relu') self.avgpool = nn.AvgPool2D(pool_size=11, strides=1, padding=0) self.flatten = nn.Flatten() self.dropout = nn.Dropout(0.5) self.dense= nn.Dense(num_classes)
def generate_lookup_table(use_se, last_conv_after_pooling, channels_layout, nas_root): stage_repeats = [4, 4, 8, 4] if channels_layout == 'OneShot': stage_out_channels = [64, 160, 320, 640] elif channels_layout == 'ShuffleNetV2+': stage_out_channels = [48, 128, 256, 512] else: raise ValueError('Unrecognized channel layout') channel_scales = [0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0] first_conv_out_channel = 16 input_size = 224 last_conv_out_channel = 1024 input_data = nd.ones((1, 3, input_size, input_size)) bar = Bar(max_step=sum(stage_repeats) + 2, name='Building lookup table') bar.start() lookup_table = dict() lookup_table['config'] = dict() lookup_table['config']['use_se'] = use_se lookup_table['config']['last_conv_after_pooling'] = last_conv_after_pooling lookup_table['config']['channels_layout'] = channels_layout lookup_table['config']['stage_repeats'] = stage_repeats lookup_table['config']['stage_out_channels'] = stage_out_channels lookup_table['config']['channel_scales'] = channel_scales lookup_table['config']['first_conv_out_channel'] = first_conv_out_channel lookup_table['config']['input_size'] = input_size lookup_table['config']['last_conv_out_channel'] = last_conv_out_channel # input block bar.step() input_block = nn.HybridSequential() input_block.add( nn.Conv2D(first_conv_out_channel, in_channels=3, kernel_size=3, strides=2, padding=1, use_bias=False, prefix='first_conv_'), nn.BatchNorm(momentum=0.1), Activation('hard_swish' if use_se else 'relu') ) input_block_flops, input_block_model_size, input_data = get_block_flop(input_block, input_data) lookup_table['flops'] = dict() lookup_table['params'] = dict() lookup_table['flops']['input_block'] = input_block_flops lookup_table['params']['input_block'] = input_block_model_size # mid blocks lookup_table['flops']['nas_block'] = [] # 20 x 4 x 10, num_of_blocks x num_of_block_choices x num_of_channel_scales lookup_table['params']['nas_block'] = [] input_channel = first_conv_out_channel for stage_id in range(len(stage_repeats)): numrepeat = stage_repeats[stage_id] output_channel = stage_out_channels[stage_id] if use_se: act_name = 'hard_swish' if stage_id >= 1 else 'relu' block_use_se = True if stage_id >= 2 else False else: act_name = 'relu' block_use_se = False # create repeated blocks for current stage for i in range(numrepeat): bar.step() stride = 2 if i == 0 else 1 output_data = None block_flops = [[0] * len(channel_scales) for _ in range(4)] block_params = [[0] * len(channel_scales) for _ in range(4)] for scale_i, scale in enumerate(channel_scales): # TODO: change back to make_divisible # mid_channel = make_divisible(int(output_channel // 2 * channel_scales[block_id])) mid_channel = int(output_channel // 2 * scale) # SNB 3x3 snb3 = ShuffleNetBlock(input_channel, output_channel, mid_channel, block_mode='ShuffleNetV2', ksize=3, stride=stride, use_se=block_use_se, act_name=act_name) snb3_block_flops, snb3_block_model_size, _ = get_block_flop(snb3, input_data) # SNB 5x5 snb5 = ShuffleNetBlock(input_channel, output_channel, mid_channel, block_mode='ShuffleNetV2', ksize=5, stride=stride, use_se=block_use_se, act_name=act_name) snb5_block_flops, snb5_block_model_size, _ = get_block_flop(snb5, input_data) # SNB 7x7 snb7 = ShuffleNetBlock(input_channel, output_channel, mid_channel, block_mode='ShuffleNetV2', ksize=7, stride=stride, use_se=block_use_se, act_name=act_name) snb7_block_flops, snb7_block_model_size, _ = get_block_flop(snb7, input_data) # SXB 3x3 sxb3 = ShuffleNetBlock(input_channel, output_channel, mid_channel, block_mode='ShuffleXception', ksize=3, stride=stride, use_se=block_use_se, act_name=act_name) sxb3_block_flops, sxb3_block_model_size, output_data = get_block_flop(sxb3, input_data) # fill the table block_flops[0][scale_i] = snb3_block_flops block_params[0][scale_i] = snb3_block_model_size block_flops[1][scale_i] = snb5_block_flops block_params[1][scale_i] = snb5_block_model_size block_flops[2][scale_i] = snb7_block_flops block_params[2][scale_i] = snb7_block_model_size block_flops[3][scale_i] = sxb3_block_flops block_params[3][scale_i] = sxb3_block_model_size lookup_table['flops']['nas_block'].append(block_flops) lookup_table['params']['nas_block'].append(block_params) input_data = output_data input_channel = output_channel # output block bar.step() output_block = nn.HybridSequential() if last_conv_after_pooling: # MobileNet V3 approach output_block.add( nn.GlobalAvgPool2D(), # no last SE for MobileNet V3 style nn.Conv2D(last_conv_out_channel, kernel_size=1, strides=1, padding=0, use_bias=True, prefix='conv_fc_'), # No bn for the conv after pooling Activation('hard_swish' if use_se else 'relu') ) else: if use_se: # ShuffleNetV2+ approach output_block.add( nn.Conv2D(make_divisible(last_conv_out_channel * 0.75), in_channels=input_channel, kernel_size=1, strides=1, padding=0, use_bias=False, prefix='last_conv_'), nn.BatchNorm(momentum=0.1), Activation('hard_swish' if use_se else 'relu'), nn.GlobalAvgPool2D(), SE(make_divisible(last_conv_out_channel * 0.75)), nn.Conv2D(last_conv_out_channel, in_channels=make_divisible(last_conv_out_channel * 0.75), kernel_size=1, strides=1, padding=0, use_bias=True, prefix='conv_fc_'), # No bn for the conv after pooling Activation('hard_swish' if use_se else 'relu') ) else: # original Oneshot Nas approach output_block.add( nn.Conv2D(last_conv_out_channel, in_channels=input_channel, kernel_size=1, strides=1, padding=0, use_bias=False, prefix='last_conv_'), nn.BatchNorm(momentum=0.1), Activation('hard_swish' if use_se else 'relu'), nn.GlobalAvgPool2D() ) # Dropout ratio follows ShuffleNetV2+ for se output_block.add( nn.Dropout(0.2 if use_se else 0.1), nn.Conv2D(1000, in_channels=last_conv_out_channel, kernel_size=1, strides=1, padding=0, use_bias=True), nn.Flatten() ) output_block_flops, output_block_model_size, output_data = get_block_flop(output_block, input_data) lookup_table['flops']['output_block'] = output_block_flops lookup_table['params']['output_block'] = output_block_model_size pp = pprint.PrettyPrinter(indent=4) pp.pprint(lookup_table) json_file = os.path.join(nas_root, 'models/lookup_table') if use_se: json_file += '_se' if last_conv_after_pooling: json_file += '_lastConvAfterPooling' json_file += '_' +channels_layout + '.json' with open(json_file, 'w') as fp: json.dump(lookup_table, fp, indent=4)
def __init__(self, cfg, cls_ch_squeeze, cls_ch_expand, multiplier=1., classes=1000, norm_kwargs=None, last_gamma=False, final_drop=0., use_global_stats=False, name_prefix='', norm_layer=BatchNorm): super(_MobileNetV3, self).__init__(prefix=name_prefix) norm_kwargs = norm_kwargs if norm_kwargs is not None else {} if use_global_stats: norm_kwargs['use_global_stats'] = True # initialize residual networks k = multiplier self.last_gamma = last_gamma self.norm_kwargs = norm_kwargs self.inplanes = 16 with self.name_scope(): self.features = nn.HybridSequential(prefix='') self.features.add(nn.Conv2D(channels=make_divisible(k*self.inplanes), \ kernel_size=3, padding=1, strides=2, use_bias=False, prefix='first-3x3-conv-conv2d_')) self.features.add(norm_layer(prefix='first-3x3-conv-batchnorm_')) self.features.add(HardSwish()) i = 0 for layer_cfg in cfg: layer = self._make_layer( kernel_size=layer_cfg[0], exp_ch=make_divisible(k * layer_cfg[1]), out_channel=make_divisible(k * layer_cfg[2]), use_se=layer_cfg[3], act_func=layer_cfg[4], stride=layer_cfg[5], prefix='seq-%d' % i, ) self.features.add(layer) i += 1 self.features.add(nn.Conv2D(channels= \ make_divisible(k*cls_ch_squeeze), \ kernel_size=1, padding=0, strides=1, use_bias=False, prefix='last-1x1-conv1-conv2d_')) self.features.add( norm_layer(prefix='last-1x1-conv1-batchnorm_', **({} if norm_kwargs is None else norm_kwargs))) self.features.add(HardSwish()) self.features.add(nn.GlobalAvgPool2D()) self.features.add( nn.Conv2D(channels=cls_ch_expand, kernel_size=1, padding=0, strides=1, use_bias=False, prefix='last-1x1-conv2-conv2d_')) self.features.add(HardSwish()) if final_drop > 0: self.features.add(nn.Dropout(final_drop)) self.output = nn.HybridSequential(prefix='output_') with self.output.name_scope(): self.output.add( nn.Conv2D(in_channels=cls_ch_expand, channels=classes, kernel_size=1, prefix='fc_'), nn.Flatten())