def __init__(self, inp = 256, mid = 128, sz = 5): super(ConvLSTM, self).__init__() with self.init_scope(): self.wxi = L.Convolution2D(inp, mid, sz, pad = sz//2) self.whi = L.Convolution2D(mid, mid, sz, pad = sz//2, nobias = True) self.wxf = L.Convolution2D(inp, mid, sz, pad = sz//2) self.whf = L.Convolution2D(mid, mid, sz, pad = sz//2, nobias = True) self.wxc = L.Convolution2D(inp, mid, sz, pad = sz//2) self.whc = L.Convolution2D(mid, mid, sz, pad = sz//2, nobias = True) self.wxo = L.Convolution2D(inp, mid, sz, pad = sz//2) self.who = L.Convolution2D(mid, mid, sz, pad = sz//2, nobias = True) self.inp = inp self.mid = mid self.pc = None self.ph = None with self.init_scope(): Wci_initializer = initializers.Zero() self.Wci = chainer.variable.Parameter(Wci_initializer) Wcf_initializer = initializers.Zero() self.Wcf = chainer.variable.Parameter(Wcf_initializer) Wco_initializer = initializers.Zero() self.Wco = chainer.variable.Parameter(Wco_initializer)
def __init__(self,mid,sz): super(newLSTM, self).__init__(#(1,3,100,100)→(1,mid*4,100,100) Wx_Inc_1 = L.Convolution2D(None, mid, ksize=1, pad=0), Wx_Inc_3 = L.Convolution2D(None, mid*2, ksize=3, pad=1), Wx_Inc_5 = L.Convolution2D(None, mid//2, ksize=5, pad=2), Wx_concat = L.Convolution2D(None, mid*4, ksize=1, pad=0), Wx_input = L.Convolution2D(None,mid*4,ksize=sz,pad=sz//2), channel_attention = ChannelAttention(mid), spatial_attention = SpatialAttention(), Wh1_Linear = L.Linear(None,256), Wh2_Linear = L.Linear(None,256), Wh1_conv = L.Convolution2D(None, 1, ksize=7, pad=3), Wh_repeat = L.Convolution2D(None, mid*4, ksize=sz, pad=sz//2, nobias = True)#(1,3,100,100)→(1,mid*4,100,100) ) self.mid = mid #64 self.pc = None self.ph = None with self.init_scope(): Wci_initializer = initializers.Zero() self.Wci = variable.Parameter(Wci_initializer) Wcf_initializer = initializers.Zero() self.Wcf = variable.Parameter(Wcf_initializer) Wco_initializer = initializers.Zero() self.Wco = variable.Parameter(Wco_initializer)
def __init__(self, n_class=21): self.n_class = n_class initialW = initializers.Zero() initialb = initializers.Zero() super(self.__class__, self).__init__( conv1_1=L.Convolution2D(3, 64, 3, stride=1, pad=100, initialW=initialW, initial_bias=initialb), conv1_2=L.Convolution2D(64, 64, 3, stride=1, pad=1, initialW=initialW, initial_bias=initialb), conv2_1=L.Convolution2D(64, 128, 3, stride=1, pad=1, initialW=initialW, initial_bias=initialb), conv2_2=L.Convolution2D(128, 128, 3, stride=1, pad=1, initialW=initialW, initial_bias=initialb), conv3_1=L.Convolution2D(128, 256, 3, stride=1, pad=1, initialW=initialW, initial_bias=initialb), conv3_2=L.Convolution2D(256, 256, 3, stride=1, pad=1, initialW=initialW, initial_bias=initialb), conv3_3=L.Convolution2D(256, 256, 3, stride=1, pad=1, initialW=initialW, initial_bias=initialb), conv4_1=L.Convolution2D(256, 512, 3, stride=1, pad=1, initialW=initialW, initial_bias=initialb), conv4_2=L.Convolution2D(512, 512, 3, stride=1, pad=1, initialW=initialW, initial_bias=initialb), conv4_3=L.Convolution2D(512, 512, 3, stride=1, pad=1, initialW=initialW, initial_bias=initialb), conv5_1=L.Convolution2D(512, 512, 3, stride=1, pad=1, initialW=initialW, initial_bias=initialb), conv5_2=L.Convolution2D(512, 512, 3, stride=1, pad=1, initialW=initialW, initial_bias=initialb), conv5_3=L.Convolution2D(512, 512, 3, stride=1, pad=1, initialW=initialW, initial_bias=initialb), fc6=L.Convolution2D(512, 4096, 7, stride=1, pad=0, initialW=initialW, initial_bias=initialb), fc7=L.Convolution2D(4096, 4096, 1, stride=1, pad=0, initialW=initialW, initial_bias=initialb), score_fr=L.Convolution2D(4096, n_class, 1, stride=1, pad=0, initialW=initialW, initial_bias=initialb), score_pool4=L.Convolution2D(512, n_class, 1, stride=1, pad=0, initialW=initialW, initial_bias=initialb), upscore2=L.Deconvolution2D(n_class, n_class, 4, stride=2, nobias=True), upscore16=L.Deconvolution2D(n_class, n_class, 32, stride=16, nobias=True), ) # initialize weights for deconv layer filt = utils.get_upsampling_filter(4) self.upscore2.W.data[...] = 0 self.upscore2.W.data[range(n_class), range(n_class), :, :] = filt filt = utils.get_upsampling_filter(32) self.upscore16.W.data[...] = 0 self.upscore16.W.data[range(n_class), range(n_class), :, :] = filt
def __init__(self, inplanes, gpu): super(hourglass, self).__init__() self.gpu = gpu self.conv1 = Sequential( convbn_3d(inplanes, inplanes * 2, kernel_size=3, stride=2, pad=1), F.relu).to_gpu(self.gpu) self.conv2 = convbn_3d(inplanes * 2, inplanes * 2, kernel_size=3, stride=1, pad=1).to_gpu(self.gpu) self.conv3 = Sequential( convbn_3d(inplanes * 2, inplanes * 2, kernel_size=3, stride=2, pad=1), F.relu).to_gpu(self.gpu) self.conv4 = Sequential( convbn_3d(inplanes * 2, inplanes * 2, kernel_size=3, stride=1, pad=1), F.relu).to_gpu(self.gpu) self.conv5 = Sequential( L.DeconvolutionND(3, inplanes * 2, inplanes * 2, ksize=4, stride=2, pad=1, nobias=True, initialW=ini.Normal(math.sqrt(2. / 32))), L.BatchNormalization(inplanes * 2, eps=1e-5, decay=0.95, initial_gamma=ini.One(), initial_beta=ini.Zero())).to_gpu( self.gpu) # +conv2 self.conv6 = Sequential( L.DeconvolutionND(3, inplanes * 2, inplanes, ksize=4, stride=2, pad=1, nobias=True), L.BatchNormalization(inplanes, eps=1e-5, decay=0.95, initial_gamma=ini.One(), initial_beta=ini.Zero())).to_gpu( self.gpu) # +x
def __init__(self, bert): super(BertPretrainer, self).__init__() with self.init_scope(): self.bert = bert self.masked_lm_dense = L.Linear(None, self.bert.config.hidden_size, initialW=initializers.Normal(scale=self.bert.config.initializer_range)) self.activate = get_activation('gelu') self.mask_bias = variable.Parameter(initializers.Zero(), shape=self.bert.config.vocab_size) self.next_sentence_weights = variable.Parameter( initializers.Normal(scale=self.bert.config.initializer_range), shape=(2, self.bert.config.hidden_size)) self.next_sentence_bias = variable.Parameter(initializers.Zero(), shape=2) self.layer_norm = LayerNormalization3D(None)
def __init__(self, inp=256, mid=128, sz=3): super(ConvLSTM, self).__init__(Wxi=L.Convolution2D(inp, mid, sz, pad=sz // 2), Whi=L.Convolution2D(mid, mid, sz, pad=sz // 2, nobias=True), Wxf=L.Convolution2D(inp, mid, sz, pad=sz // 2), Whf=L.Convolution2D(mid, mid, sz, pad=sz // 2, nobias=True), Wxc=L.Convolution2D(inp, mid, sz, pad=sz // 2), Whc=L.Convolution2D(mid, mid, sz, pad=sz // 2, nobias=True), Wxo=L.Convolution2D(inp, mid, sz, pad=sz // 2), Who=L.Convolution2D(mid, mid, sz, pad=sz // 2, nobias=True)) self.inp = inp self.mid = mid self.pc = None self.ph = None with self.init_scope(): Wci_initializer = initializers.Zero() self.Wci = variable.Parameter(Wci_initializer) Wcf_initializer = initializers.Zero() self.Wcf = variable.Parameter(Wcf_initializer) Wco_initializer = initializers.Zero() self.Wco = variable.Parameter(Wco_initializer)
def __init__(self, in_channels, out_channels, ksize=3, pad=1): super(ConvLSTM, self).__init__(Wxi=L.Convolution2D(in_channels, out_channels, ksize=ksize, pad=pad), Whi=L.Convolution2D(out_channels, out_channels, ksize=ksize, pad=pad, nobias=True), Wxf=L.Convolution2D(in_channels, out_channels, ksize=ksize, pad=pad), Whf=L.Convolution2D(out_channels, out_channels, ksize=ksize, pad=pad, nobias=True), Wxc=L.Convolution2D(in_channels, out_channels, ksize=ksize, pad=pad), Whc=L.Convolution2D(out_channels, out_channels, ksize=ksize, pad=pad, nobias=True), Wxo=L.Convolution2D(in_channels, out_channels, ksize=ksize, pad=pad), Who=L.Convolution2D(out_channels, out_channels, ksize=ksize, pad=pad, nobias=True)) self.in_channels = in_channels self.out_channels = out_channels self.pc = None self.ph = None with self.init_scope(): Wci_initializer = initializers.Zero() self.Wci = variable.Parameter(Wci_initializer) Wcf_initializer = initializers.Zero() self.Wcf = variable.Parameter(Wcf_initializer) Wco_initializer = initializers.Zero() self.Wco = variable.Parameter(Wco_initializer)
def __init__(self, channels): super(AffineChannel2D, self).__init__() with self.init_scope(): self.W = chainer.variable.Parameter(initializers.One(), (channels, )) self.b = chainer.variable.Parameter(initializers.Zero(), (channels, ))
def __init__(self, k=3, use_bn=True, residual=False): super(TransformModule, self).__init__() initial_bias = numpy.identity(k, dtype=numpy.float32).ravel() with self.init_scope(): self.conv_block1 = ConvBlock(k, 64, ksize=1, use_bn=use_bn, residual=residual) self.conv_block2 = ConvBlock(64, 128, ksize=1, use_bn=use_bn, residual=residual) self.conv_block3 = ConvBlock(128, 1024, ksize=1, use_bn=use_bn, residual=residual) # [Note] # Original paper uses BN for fc layer as well. # https://github.com/charlesq34/pointnet/blob/master/models/transform_nets.py#L34 # This chanier impl. skip BN for fc layer self.fc4 = links.Linear(1024, 512) # self.bn4 = links.BatchNormalization(512) self.fc5 = links.Linear(512, 256) # self.bn5 = links.BatchNormalization(256) # initial output of transform net should be identity self.fc6 = links.Linear( 256, k * k, initialW=initializers.Zero(dtype=numpy.float32), initial_bias=initial_bias) self.k = k
def __init__(self, node_in_size, weight_len): super(OpenCRFLayer, self).__init__() self.node_in_size = node_in_size with self.init_scope(): self.W = chainer.Parameter( initializer=initializers.Zero(dtype=np.float32), shape=(weight_len, ))
def __init__(self, n_in, n_out, stride=1): self.dtype = np.float32 w = 1 / np.sqrt(2) initW = initializers.HeNormal(scale=w) initbias = initializers.Zero() super(Module, self).__init__( conv1=L.Convolution2D(n_in, n_out, 3, stride, 1, 1, initialW=initW, initial_bias=initbias), bn1=L.BatchNormalization(n_out, dtype=self.dtype), conv2=L.Convolution2D(n_out, n_out, 3, 1, 1, 1, initialW=initW, initial_bias=initbias), bn2=L.BatchNormalization(n_out, dtype=self.dtype), )
def __init__(self, n_class, aspect_ratios, feature_channel, initialW=None, initial_bias=None): self.n_class = n_class self.aspect_ratios = aspect_ratios self.feature_map_channel = feature_channel super(Multibox, self).__init__() with self.init_scope(): self.loc = chainer.ChainList() self.conf = chainer.ChainList() if initialW is None: initialW = initializers.LeCunUniform() if initial_bias is None: initial_bias = initializers.Zero() init = {'initialW': initialW, 'initial_bias': initial_bias} for ar, f_channel in zip(aspect_ratios, feature_channel): #n = (len(ar) + 1) * 2 n = len(ar) + 1 self.loc.add_link(MB_module(f_channel, n * 4, 3)) self.conf.add_link(MB_module(f_channel, n * self.n_class, 3))
def __init__(self, n_class, aspect_ratios, initialW=None, initial_bias=None): self.n_class = n_class self.aspect_ratios = aspect_ratios super(ResidualMultibox, self).__init__() with self.init_scope(): self.res = chainer.ChainList() self.loc = chainer.ChainList() self.conf = chainer.ChainList() if initialW is None: initialW = initializers.LeCunUniform() if initial_bias is None: initial_bias = initializers.Zero() init = {'initialW': initialW, 'initial_bias': initial_bias} for ar in aspect_ratios: n = (len(ar) + 1) * 2 self.res.add_link(Residual(**init)) self.loc.add_link(L.Convolution2D(n * 4, 3, pad=1, **init)) self.conf.add_link( L.Convolution2D(n * self.n_class, 3, pad=1, **init))
def test_add_param(self): with testing.assert_warns(DeprecationWarning): self.link.add_param('z', (2, 3)) self.check_param_init('z', (2, 3), 'f') with testing.assert_warns(DeprecationWarning): self.link.add_param('w', (2, 3), dtype='d') self.check_param_init('w', (2, 3), 'd') with testing.assert_warns(DeprecationWarning): self.link.add_param('r') self.check_param_uninit('r') self.link.r.initialize((2, 3)) self.check_param_init('r', (2, 3), 'f') with testing.assert_warns(DeprecationWarning): self.link.add_param('s', dtype='d') self.check_param_uninit('s') self.link.s.initialize((2, 3)) self.check_param_init('s', (2, 3), 'd') initializer = initializers.Zero('d') with testing.assert_warns(DeprecationWarning): self.link.add_param('t', initializer=initializer) self.check_param_uninit('t', initializer) self.link.t.initialize((2, 3)) self.check_param_init('t', (2, 3), 'd', 0)
def __init__(self, n=5): super(ResNet, self).__init__() self.dtype = np.float32 w = 1 / np.sqrt(2) initW = initializers.HeNormal(scale=w) initbias = initializers.Zero() links = [('conv1', L.Convolution2D(3, 16, 3, 1, 1, initialW=initW, initial_bias=initbias)), ('bn2', L.BatchNormalization(16, dtype=self.dtype)), ('_relu3', F.ReLU()), ('res4', Block(16, 16, n)), ('res5', Block(16, 32, n, 2)), ('res6', Block(32, 64, n, 2)), ('_apool7', F.AveragePooling2D(8, 1, 0, False, True)), ('fc8', L.Linear(64, 10, initialW=initW, initial_bias=initbias))] for i, link in enumerate(links): if 'res' in link[0] and os.path.isfile(link[0] + '.hdf5'): self.add_link(*link) serializers.load_hdf5(link[0] + '.hdf5', getattr(self, link[0])) elif not link[0].startswith('_'): self.add_link(*link) self.forward = links self.train = True
def __init__(self, n_class, aspect_ratios, initialW=None, initial_bias=None): self.n_class = n_class self.aspect_ratios = aspect_ratios super(ExtendedMultibox, self).__init__() with self.init_scope(): self.extconv = chainer.ChainList() self.loc = chainer.ChainList() self.conf = chainer.ChainList() self.ext = chainer.ChainList() if initialW is None: initialW = initializers.LeCunUniform() if initial_bias is None: initial_bias = initializers.Zero() init = {'initialW': initialW, 'initial_bias': initial_bias} for i in range(3): self.ext.add_link(ExtensionModule(i == 1, **init)) for ar in aspect_ratios: n = (len(ar) + 1) * 2 self.extconv.add_link(ExtendedConv(**init)) self.loc.add_link(L.Convolution2D(n * 4, 3, pad=1, **init)) self.conf.add_link( L.Convolution2D(n * self.n_class, 3, pad=1, **init))
def __init__(self, shape, glow_encoder): super().__init__() self.encoder = glow_encoder with self.init_scope(): self.b = chainer.Parameter(initializers.Zero(), shape) self.m = chainer.Parameter(initializers.One(), (3, 8, 8))
def __init__(self, n_class, aspect_ratios, initialW=None, initial_bias=None): self.n_class = n_class self.aspect_ratios = aspect_ratios super(MultiboxWithTCB, self).__init__() with self.init_scope(): self.arm_loc = chainer.ChainList() self.arm_conf = chainer.ChainList() self.tcb = chainer.ChainList() self.odm_loc = chainer.ChainList() self.odm_conf = chainer.ChainList() if initialW is None: initialW = initializers.LeCunUniform() if initial_bias is None: initial_bias = initializers.Zero() init = {'initialW': initialW, 'initial_bias': initial_bias} for i in range(3): self.tcb.add_link(TransferConnection(**init)) self.tcb.add_link(TransferConnectionEnd(**init)) for ar in aspect_ratios: n = (len(ar) + 1) * 2 - 1 self.arm_loc.add_link(L.Convolution2D(n * 4, 3, pad=1, **init)) self.arm_conf.add_link(L.Convolution2D(n, 3, pad=1, **init)) self.odm_loc.add_link(L.Convolution2D(n * 4, 3, pad=1, **init)) self.odm_conf.add_link( L.Convolution2D(n * self.n_class, 3, pad=1, **init))
def create_initializer(init_type, scale=None, fillvalue=None): if init_type == 'identity': return initializers.Identity() if scale is None else initializers.Identity(scale=scale) if init_type == 'constant': return initializers.Constant(fillvalue) if init_type == 'zero': return initializers.Zero() if init_type == 'one': return initializers.One() if init_type == 'normal': return initializers.Normal() if scale is None else initializers.Normal(scale) if init_type == 'glorotNormal': return initializers.GlorotNormal() if scale is None else initializers.GlorotNormal(scale) if init_type == 'heNormal': return initializers.HeNormal() if scale is None else initializers.HeNormal(scale) if init_type == 'orthogonal': return initializers.Orthogonal( scale) if scale is None else initializers.Orthogonal(scale) if init_type == 'uniform': return initializers.Uniform( scale) if scale is None else initializers.Uniform(scale) if init_type == 'leCunUniform': return initializers.LeCunUniform( scale) if scale is None else initializers.LeCunUniform(scale) if init_type == 'glorotUniform': return initializers.GlorotUniform( scale) if scale is None else initializers.GlorotUniform(scale) if init_type == 'heUniform': return initializers.HeUniform( scale) if scale is None else initializers.HeUniform(scale) raise ValueError("Unknown initializer type: {0}".format(init_type))
def __init__(self, n_class, aspect_ratios, initialW=None, initial_bias=None): self.n_class = n_class self.aspect_ratios = aspect_ratios self._input_multiplier = 128 super().__init__() with self.init_scope(): self.loc = chainer.ChainList() self.features = chainer.ChainList() if initialW is None: initialW = initializers.LeCunUniform() if initial_bias is None: initial_bias = initializers.Zero() init = {'initialW': initialW, 'initial_bias': initial_bias} for ar in aspect_ratios: n = (len(ar) + 1) * 2 self.loc.add_link(L.Convolution2D(n * 4, 3, pad=1, **init)) self.features.add_link( L.Convolution2D(n * self._input_multiplier, 3, pad=1, **init))
def __init__(self, size, decay=0.9, eps=2e-5, dtype=numpy.float32, use_gamma=True, use_beta=True, initial_gamma=None, initial_beta=None): super(BatchNormalization, self).__init__() if use_gamma: self.add_param('gamma', size, dtype=dtype) if initial_gamma is None: initial_gamma = initializers.One() #initializers.init_weight(self.gamma.data, initial_gamma) self.gamma = chainer.Parameter(initial_gamma, (size, )) if use_beta: self.add_param('beta', size, dtype=dtype) if initial_beta is None: initial_beta = initializers.Zero() #initializers.init_weight(self.beta.data, initial_beta) self.beta = chainer.Parameter(initial_beta, (size, )) self.add_persistent('avg_mean', numpy.zeros(size, dtype=dtype)) self.add_persistent('avg_var', numpy.zeros(size, dtype=dtype)) self.add_persistent('N', 0) self.decay = decay self.eps = eps
def __init__(self, n_class, aspect_ratios, initialW=None, initial_bias=None): super(Multibox, self).__init__() self.n_class = n_class self.aspect_ratios = aspect_ratios if initialW is None: initialW = initializers.LeCunUniform() if initial_bias is None: initial_bias = initializers.Zero() init = {'initialW': initialW, 'initial_bias': initial_bias} with self.init_scope(): # with chainer.using_config('dtype', 'float32'): for i, ar in enumerate(aspect_ratios): n = (len(ar) + 1) * 2 loc_name = 'loc_{}'.format(i) conf_name = 'conf_{}'.format(i) setattr(self, loc_name, L.Convolution2D(n * 4, 3, pad=1, **init)) setattr(self, conf_name, L.Convolution2D(n * self.n_class, 3, pad=1, **init)) self.concat_locs = lambda xs: F.concat(xs, axis=1) self.concat_confs = lambda xs: F.concat(xs, axis=1) self.post_loc = lambda x: post_loc(x) self.post_conf = lambda x: post_conf(x, self.n_class)
def test_copydata_from_uninitialized_parameter(self): initializer = initializers.Zero() x = chainer.Parameter(self.a) y = chainer.Parameter(initializer) x.copydata(y) self.assertIsInstance(x.data, np.ndarray) self.assertIsInstance(y.data, np.ndarray) np.testing.assert_array_equal(x.data, y.data)
def __init__(self): init = { 'initialW': initializers.LeCunUniform(), 'initial_bias': initializers.Zero(), } super(VGG16Extractor320, self).__init__() with self.init_scope(): self.conv6_1 = L.Convolution2D(256, 1, **init) self.conv6_2 = L.Convolution2D(512, 3, stride=2, pad=1, **init)
def _initialize_params(self): bias_initializer = initializers.Zero() self.add_param('b', self.state_size*4, initializer=bias_initializer) a, i, f, o = lstm._extract_gates(self.b.data.reshape(1, 4 * self.state_size, 1)) initializers.init_weight(a, self.bias_init) initializers.init_weight(i, self.bias_init) initializers.init_weight(f, self.forget_bias_init) initializers.init_weight(o, self.bias_init)
def convbn_3d(in_planes, out_planes, kernel_size, stride, pad): return Sequential( L.ConvolutionND(3, in_planes, out_planes, ksize=kernel_size, stride=stride, pad=pad, nobias=True, initialW=ini.Normal(math.sqrt(2. / (kernel_size * kernel_size * kernel_size * out_planes)))), L.BatchNormalization(out_planes, eps=1e-5, decay=0.95, initial_gamma=ini.One(), initial_beta=ini.Zero()), )
def __init__(self, in_channels, out_channels, ksize=None, stride=1, pad=0, dilate=1, groups=1, nobias=True, initialW=None, initial_bias=None, use_scale=True, activ=F.relu): """ CTOR. """ super(FixupConv2D, self).__init__() if initialW is None: # NOTE: update it to zero initializer initialW = I.Zero() with self.init_scope(): self.conv = L.Convolution2D(in_channels, out_channels, ksize=ksize, stride=stride, pad=pad, nobias=nobias, initialW=initialW, initial_bias=initial_bias, dilate=dilate, groups=groups) # bias term for conv input and output self.bias_in = chainer.Parameter(initializer=I.Zero(), shape=1) self.bias_out = chainer.Parameter(initializer=I.Zero(), shape=1) # NOTE: activ controls whether to use scale as well if use_scale or activ is None: self.scale = chainer.Parameter(initializer=I.One(), shape=1) else: self.scale = None # activation self.activ = activ
def __init__(self, embeddings, n_labels, n_blstm_layers=3, lstm_hidden_size=400, use_gru=False, n_arc_mlp_layers=1, n_arc_mlp_units=500, n_label_mlp_layers=1, n_label_mlp_units=100, mlp_activation=F.leaky_relu, embeddings_dropout=0.33, lstm_dropout=0.33, arc_mlp_dropout=0.33, label_mlp_dropout=0.33): super(DeepBiaffine, self).__init__() blstm_cls = BiGRU if use_gru else BiLSTM with self.init_scope(): orthonormal_initializer = Orthonormal() zero_initializer = initializers.Zero() self.embed = Embed(*embeddings, dropout=embeddings_dropout) embed_size = self.embed.size - self.embed[0].W.shape[1] self.blstm = blstm_cls( n_layers=n_blstm_layers, in_size=embed_size, out_size=(lstm_hidden_size if lstm_hidden_size is not None else embed_size), dropout=lstm_dropout, initialW=orthonormal_initializer ) self.mlp_arc_head = MLP([MLP.Layer(None, n_arc_mlp_units, mlp_activation, arc_mlp_dropout, initialW=orthonormal_initializer) for i in range(n_arc_mlp_layers)]) self.mlp_arc_dep = MLP([MLP.Layer(None, n_arc_mlp_units, mlp_activation, arc_mlp_dropout, initialW=orthonormal_initializer) for i in range(n_arc_mlp_layers)]) self.mlp_label_head = MLP([MLP.Layer(None, n_label_mlp_units, mlp_activation, label_mlp_dropout, initialW=orthonormal_initializer) for i in range(n_label_mlp_layers)]) self.mlp_label_dep = MLP([MLP.Layer(None, n_label_mlp_units, mlp_activation, label_mlp_dropout, initialW=orthonormal_initializer) for i in range(n_label_mlp_layers)]) self.arc_biaffine = \ Biaffine(n_arc_mlp_units, n_arc_mlp_units, 1, nobias=(False, True, True), initialW=zero_initializer) self.label_biaffine = \ Biaffine(n_label_mlp_units, n_label_mlp_units, n_labels, nobias=(False, False, False), initialW=zero_initializer)
def __init__(self): self.dtype = dtype = np.float16 W = initializers.HeNormal(1 / np.sqrt(2), self.dtype) bias = initializers.Zero(self.dtype) chainer.Chain.__init__(self) with self.init_scope(): self.conv1 = L.Convolution2D( None, 64, 7, stride=2, pad=3, initialW=W, nobias=True) self.norm1 = L.BatchNormalization(64, dtype=dtype) self.conv2 = L.Convolution2D( None, 192, 3, pad=1, initialW=W, nobias=True) self.norm2 = L.BatchNormalization(192, dtype=dtype) self.inc3a = L.InceptionBN( None, 64, 64, 64, 64, 96, 'avg', 32, conv_init=W, dtype=dtype) self.inc3b = L.InceptionBN( None, 64, 64, 96, 64, 96, 'avg', 64, conv_init=W, dtype=dtype) self.inc3c = L.InceptionBN( None, 0, 128, 160, 64, 96, 'max', stride=2, conv_init=W, dtype=dtype) self.inc4a = L.InceptionBN( None, 224, 64, 96, 96, 128, 'avg', 128, conv_init=W, dtype=dtype) self.inc4b = L.InceptionBN( None, 192, 96, 128, 96, 128, 'avg', 128, conv_init=W, dtype=dtype) self.inc4c = L.InceptionBN( None, 128, 128, 160, 128, 160, 'avg', 128, conv_init=W, dtype=dtype) self.inc4d = L.InceptionBN( None, 64, 128, 192, 160, 192, 'avg', 128, conv_init=W, dtype=dtype) self.inc4e = L.InceptionBN( None, 0, 128, 192, 192, 256, 'max', stride=2, conv_init=W, dtype=dtype) self.inc5a = L.InceptionBN( None, 352, 192, 320, 160, 224, 'avg', 128, conv_init=W, dtype=dtype) self.inc5b = L.InceptionBN( None, 352, 192, 320, 192, 224, 'max', 128, conv_init=W, dtype=dtype) self.out = L.Linear(None, 1000, initialW=W, bias=bias) self.conva = L.Convolution2D(None, 128, 1, initialW=W, nobias=True) self.norma = L.BatchNormalization(128, dtype=dtype) self.lina = L.Linear(None, 1024, initialW=W, nobias=True) self.norma2 = L.BatchNormalization(1024, dtype=dtype) self.outa = L.Linear(None, 1000, initialW=W, bias=bias) self.convb = L.Convolution2D(None, 128, 1, initialW=W, nobias=True) self.normb = L.BatchNormalization(128, dtype=dtype) self.linb = L.Linear(None, 1024, initialW=W, nobias=True) self.normb2 = L.BatchNormalization(1024, dtype=dtype) self.outb = L.Linear(None, 1000, initialW=W, bias=bias)
def __init__(self, dtype=numpy.float32): super(SimpleNet, self).__init__() self.dtype = dtype W = initializers.HeNormal(1 / numpy.sqrt(2), self.dtype) bias = initializers.Zero(self.dtype) with self.init_scope(): self.conv = chainer.links.Convolution2D(2, 2, 3, initialW=W, initial_bias=bias) self.fc = chainer.links.Linear(18, 2, initialW=W, initial_bias=bias) self.train = True