def __init__(self, preact, num_joints, norm_layer=nn.BatchNorm, norm_kwargs=None, **kwargs): super(AlphaPose, self).__init__(**kwargs) self.preact = preact self.num_joints = num_joints self.shuffle1 = PixelShuffle(2) if norm_kwargs is None: norm_kwargs = {} self.duc1 = DUC(1024, inplanes=512, upscale_factor=2, norm_layer=norm_layer, **norm_kwargs) self.duc2 = DUC(512, inplanes=256, upscale_factor=2, norm_layer=norm_layer, **norm_kwargs) self.conv_out = nn.Conv2D(channels=num_joints, in_channels=128, kernel_size=3, strides=1, padding=1, weight_initializer=initializer.Uniform( scale=math.sqrt(1 / (128 * 3 * 3))), bias_initializer=initializer.Uniform( scale=math.sqrt(1 / (128 * 3 * 3))))
def make_layer(self, block, planes, blocks, stride=1, **kwargs): """ Make ResNet stage """ downsample = None if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.HybridSequential() downsample.add(nn.Conv2D(planes * block.expansion, in_channels=self.inplanes, kernel_size=1, strides=stride, use_bias=False, weight_initializer=initializer.Uniform( scale=math.sqrt(1 / (self.inplanes * 1 * 1))), bias_initializer=initializer.Uniform( scale=math.sqrt(1 / (self.inplanes * 1 * 1))))) downsample.add(self.norm_layer(gamma_initializer=ZeroUniform(), **kwargs)) layers = nn.HybridSequential() if downsample is not None: layers.add(block(self.inplanes, planes, stride, downsample, reduction=True, norm_layer=self.norm_layer, **kwargs)) else: layers.add(block(self.inplanes, planes, stride, downsample, norm_layer=self.norm_layer, **kwargs)) self.inplanes = planes * block.expansion for _ in range(1, blocks): layers.add(block(self.inplanes, planes, norm_layer=self.norm_layer, **kwargs)) return layers
def __init__(self, architecture, norm_layer=nn.BatchNorm, **kwargs): super(FastSEResNet, self).__init__() architecture = architecture.split('_')[0] assert architecture in ["resnet50", "resnet101"] self.inplanes = 64 self.norm_layer = norm_layer self.layers = [3, 4, {"resnet50": 6, "resnet101": 23}[architecture], 3] self.block = Bottleneck self.conv1 = nn.Conv2D(64, in_channels=3, kernel_size=7, strides=2, padding=3, use_bias=False, weight_initializer=initializer.Uniform( scale=math.sqrt(1 / (3 * 7 * 7))), bias_initializer=initializer.Uniform( scale=math.sqrt(1 / (3 * 7 * 7)))) self.bn1 = self.norm_layer(gamma_initializer=ZeroUniform(), **kwargs) self.relu = nn.Activation('relu') self.maxpool = nn.MaxPool2D(pool_size=3, strides=2, padding=1) self.layer1 = self.make_layer(self.block, 64, self.layers[0], **kwargs) self.layer2 = self.make_layer( self.block, 128, self.layers[1], stride=2, **kwargs) self.layer3 = self.make_layer( self.block, 256, self.layers[2], stride=2, **kwargs) self.layer4 = self.make_layer( self.block, 512, self.layers[3], stride=2, **kwargs)
def __init__(self, planes, inplanes, upscale_factor=2, norm_layer=nn.BatchNorm, **kwargs): super(DUC, self).__init__() with self.name_scope(): self.conv = nn.Conv2D( planes, in_channels=inplanes, kernel_size=3, padding=1, use_bias=False, weight_initializer=initializer.Uniform(scale=math.sqrt(1 / (inplanes * 3 * 3))), bias_initializer=initializer.Uniform(scale=math.sqrt(1 / (inplanes * 3 * 3)))) self.bn = norm_layer(gamma_initializer=ZeroUniform(), **kwargs) self.relu = nn.Activation('relu') self.pixel_shuffle = PixelShuffle(upscale_factor)
def initialize(self, init=initializer.Uniform(), ctx=None, verbose=False, force_reinit=False): self.add(nn.Conv2D(kernel_size=9, channels=64, activation='relu')) self.add(nn.Conv2D(kernel_size=1, channels=32, activation='relu')) self.add(nn.Conv2D(kernel_size=5, channels=3)) return super().initialize(init=init, ctx=ctx, verbose=verbose, force_reinit=force_reinit)
def initialize(self, init=initializer.Uniform(), ctx=None, verbose=False, force_reinit=False): if self.pretrained: if not self.pretrained_cnn: self._cnn2.initialize(init, ctx) self._rnn.initialize(init, ctx) else: super().initialize(init, ctx, verbose, force_reinit)
def __init__(self, inplanes, planes, stride=1, downsample=None, reduction=False, norm_layer=nn.BatchNorm, **kwargs): super(Bottleneck, self).__init__() with self.name_scope(): self.conv1 = nn.Conv2D(planes, in_channels=inplanes, kernel_size=1, use_bias=False, weight_initializer=initializer.Uniform( scale=math.sqrt(1 / (inplanes * 1 * 1))), bias_initializer=initializer.Uniform( scale=math.sqrt(1 / (inplanes * 1 * 1)))) self.bn1 = norm_layer(gamma_initializer=ZeroUniform(), **kwargs) self.conv2 = nn.Conv2D(planes, in_channels=planes, kernel_size=3, strides=stride, padding=1, use_bias=False, weight_initializer=initializer.Uniform( scale=math.sqrt(1 / (planes * 3 * 3))), bias_initializer=initializer.Uniform( scale=math.sqrt(1 / (planes * 3 * 3)))) self.bn2 = norm_layer(gamma_initializer=ZeroUniform(), **kwargs) self.conv3 = nn.Conv2D(planes * 4, in_channels=planes, kernel_size=1, use_bias=False, weight_initializer=initializer.Uniform( scale=math.sqrt(1 / (planes * 1 * 1))), bias_initializer=initializer.Uniform( scale=math.sqrt(1 / (planes * 1 * 1)))) self.bn3 = norm_layer(gamma_initializer=ZeroUniform(), **kwargs) if reduction: self.se = SELayer(planes * 4) self.reduc = reduction self.downsample = downsample self.stride = stride
def initialize( self, init=initializer.Uniform(), ctx=None, verbose=False, force_reinit=False, ): super(Model, self).initialize( init=initializer.Xavier(rnd_type="uniform"), ctx=ctx, verbose=verbose, force_reinit=force_reinit, )
def build_model(A, X): model = nn.HybridSequential() in_units = X.shape[1] with model.name_scope(): features, out_units = build_features(A, X) model.add(features) classifier = LogisticRegressor(out_units) model.add(classifier) model.hybridize() model.initialize(initializer.Uniform(1)) return model, features
def get_net(self): net = gluon.nn.Sequential() drop_prob = 0.2 # name_scope给参数一个唯一的名字,便于load/save模型 with net.name_scope(): #net.add(gluon.nn.Dense(100, activation='relu')) net.add(gluon.nn.Dense(70, activation='relu')) net.add(gluon.nn.Dense(30, activation='relu')) #net.add(gluon.nn.BatchNorm(axis=1)) #net.add(gluon.nn.Dropout(drop_prob)) #net.add(gluon.nn.Activation(activation='relu')) net.add(gluon.nn.Dense(1)) net.initialize(init=initializer.Uniform()) return net
def build_initializer(type, kerasDefaults, constant=0.): if type == 'constant': return initializer.Constant(constant) elif type == 'uniform': return initializer.Uniform(scale=kerasDefaults['maxval_uniform']) elif type == 'normal': return initializer.Normal(sigma=kerasDefaults['stddev_normal']) elif type == 'glorot_uniform': return initializer.Xavier(rnd_type='uniform', factor_type='avg', magnitude=3.) elif type == 'lecun_uniform': return initializers.Xavier(rnd_type='uniform', factor_type='in', magnitude=3.) elif type == 'he_normal': return initializer.Xavier(rnd_type='gaussian', factor_type='in', magnitude=2.)
def load_params(self, inference, init=initializer.Uniform(), postfix='epoch'): """ load the parameters :param inference: network :param init: initializer function :param postfix: postfix :return: """ if self.args.training: if self.args.pretrained: # print('load the weights from path: %s' % self.args.model_path) print('load the weights for features from path: %s' % self.args.model_path) inference.features.load_parameters(self.args.model_path, self.args.ctx, ignore_extra=True) print('initialize the weights for embeds and output') inference.embeds.initialize( init=initializer.Xavier(magnitude=2.24), ctx=self.args.ctx) inference.output.initialize( init=initializer.Xavier(magnitude=2.24), ctx=self.args.ctx) elif self.args.model_path.endswith('.params'): print('load the weights from path: %s' % self.args.model_path) inference.load_parameters(self.args.model_path, self.args.ctx) elif self.args.start_epoch > 0: print('load the weights from path: %s' % os.path.join( self.args.ckpt, '%s-%s-%04d.params' % (self.args.bb, postfix, 0))) inference.load_parameters( os.path.join( self.args.ckpt, '%s-%s-%04d.params' % (self.args.bb, postfix, 0)), self.args.ctx) else: print('Initialize the weights') inference.initialize(init, ctx=self.args.ctx) else: print('load the weights from path: %s' % self.args.model_path) inference.load_parameters(self.args.model_path, self.args.ctx)
def generate_initializer(init_dict): if init_dict is None: return init.Normal() init_type = init_dict['type'] init_param = init_dict['init_config'] # currently Uniform, Normal, Xavier, MSRAPrelu are supported if init_type == 'Uniform': scale = float(init_param['scale']) return init.Uniform(scale) if init_type == 'Normal': sigma = float(init_param['sigma']) return init.Normal(sigma) # Xavier if init_type == 'Xavier': magnitude = float(init_param['magnitude']) return init.Xavier(magnitude=magnitude) # PReLU if init_type == 'MSRAPrelu': slope = float(init_param['slope']) return init.MSRAPrelu(factor_type='avg', slope=slope)
@use_np @with_environment('MXNET_ENGINE_TYPE', 'NaiveEngine') def test_18934_empty_leaky_relu(): arr = np.random.rand(0,2) arr_grad = np.empty_like(arr) autograd.mark_variables([arr], [arr_grad]) with autograd.record(): res = npx.leaky_relu(arr) res.backward() @use_np @pytest.mark.parametrize('initializer',[ 'zeros', 'ones', initializer.Constant(3), initializer.Uniform(), initializer.Normal(), initializer.Orthogonal(), initializer.Orthogonal(rand_type='normal'), initializer.Xavier(), initializer.Xavier(rnd_type='gaussian'), initializer.MSRAPrelu(), initializer.MSRAPrelu(factor_type='in'), initializer.MSRAPrelu(factor_type='out'), initializer.LSTMBias(), ]) @pytest.mark.parametrize('dtype', [ 'float32', 'float64' ]) def test_19118(initializer, dtype): net = gluon.nn.Dense(16, in_units=16)
def __init__(self, supernet, train_set='imagenet', val_set=None, train_fn=default_train_fn, eval_fn=default_val_fn, post_epoch_fn=None, post_epoch_save=None, eval_split_pct=0.5, train_args={}, val_args={}, reward_fn=default_reward_fn, num_gpus=0, num_cpus=4, batch_size=256, epochs=120, warmup_epochs=5, controller_lr=1e-3, controller_type='lstm', controller_batch_size=10, ema_baseline_decay=0.95, update_arch_frequency=20, checkname='./enas/checkpoint.ag', plot_frequency=0, custom_batch_fn = None, tensorboard_log_dir=None, training_name='enas_training', **kwargs): num_cpus = get_cpu_count() if num_cpus > get_cpu_count() else num_cpus if (type(num_gpus) == tuple) or (type(num_gpus) == list): for gpu in num_gpus: if gpu >= get_gpu_count(): raise ValueError('This gpu index does not exist (not enough gpus).') else: num_gpus = get_gpu_count() if num_gpus > get_gpu_count() else num_gpus self.supernet = supernet self.train_fn = train_fn self.eval_fn = eval_fn self.reward_fn = reward_fn self.post_epoch_fn = post_epoch_fn self.post_epoch_save = post_epoch_save self.eval_split_pct = eval_split_pct self.checkname = checkname self.plot_frequency = plot_frequency self.epochs = epochs self.warmup_epochs = warmup_epochs self.controller_batch_size = controller_batch_size self.tensorboard_log_dir = tensorboard_log_dir self.summary_writer = SummaryWriter(logdir=self.tensorboard_log_dir + '/' + training_name, flush_secs=5, verbose=False) self.config_images = {} kwspaces = self.supernet.kwspaces self.initialize_miscs(train_set, val_set, batch_size, num_cpus, num_gpus, train_args, val_args, custom_batch_fn= custom_batch_fn) # create RL searcher/controller self.baseline = None self.ema_decay = ema_baseline_decay self.searcher = RLSearcher( kwspaces, controller_type=controller_type, prefetch=4, num_workers=4, softmax_temperature=5, tanh_constant=2.5) # controller setup self.controller = self.searcher.controller # MIDL init controller params to range of ENAS paper self.controller.initialize(init=initializer.Uniform(0.1)) self.controller_optimizer = mx.gluon.Trainer( self.controller.collect_params(), 'adam', optimizer_params={'learning_rate': controller_lr}) self.update_arch_frequency = update_arch_frequency self.val_acc = 0 self.eval_acc = 0 # async controller sample self._worker_pool = ThreadPool(2) self._data_buffer = {} self._rcvd_idx = 0 self._sent_idx = 0 self._timeout = 20 # logging history self.training_history = [] self._prefetch_controller()
def initialize(self, init=initializer.Uniform(), ctx=None, verbose=False, force_reinit=False): self.collect_params(select=self.name).initialize(init, ctx, verbose, force_reinit)