Example #1
0
class Upsampler(Chain):
    def __init__(self, channels, ksize, repeat):
        super().__init__()
        self.channels = channels
        self.ksize = ksize
        self.repeat = repeat
        with self.init_scope():
            self.convs = Sequential(
                Convolution2D(channels * 2,
                              channels * 2,
                              ksize=ksize,
                              stride=1,
                              pad=0), relu).repeat(repeat)
            self.convs.append(
                Convolution2D(channels * 2, channels, ksize=1, stride=1,
                              pad=0))

    def __call__(self, x, y):
        x_height, x_width = x.shape[2:4]
        z_height, z_width = x_height * 2, x_width * 2
        z = resize_images(x, (z_height, z_width), mode="nearest")
        y_height, y_width = y.shape[2:4]
        height, width = min(x_height, y_height), min(x_width, y_height)
        z_width_sub = (z_width - width) // 2
        z_height_sub = (z_height - height) // 2
        y_width_sub = (y_width - width) // 2
        y_height_sub = (y_height - height) // 2
        zs = z[:, :, z_height_sub:-z_height_sub, z_width_sub:-z_width_sub]
        ys = y[:, :, y_height_sub:-y_height_sub, y_width_sub:-y_width_sub]
        return self.convs(concat((zs, ys), axis=1))
Example #2
0
def _get_model(layers, comm, predict=False):
    model = Sequential()
    W = chainer.initializers.HeNormal(1 / np.sqrt(1 / 2), dtype=np.float32)
    bias = chainer.initializers.Zero(dtype=np.float32)

    for layer in layers:
        name = layer['name']
        parameter = copy.deepcopy(layer['parameter'])
        if name.split('.')[0] == 'GC':
            parameter.update({'predict': predict})
            if 'batch_norm' in parameter.keys() and parameter['batch_norm']:
                parameter.update({'comm': comm})
            if 'activation' in parameter.keys() and parameter['activation']:
                parameter['activation'] = eval(parameter['activation'])
            add_layer = eval(name)(**parameter)
        elif name.split('.')[0] == 'L':
            if 'Linear' in name.split('.')[1]:
                parameter.update({'initialW': W, 'initial_bias': bias})
            add_layer = eval(name)(**parameter)
        elif name.split('.')[0] == 'F':
            if len(parameter) == 0:
                add_layer = partial(eval(name))
            else:
                add_layer = partial(eval(name), **parameter)
        elif name.split('.')[0] == 'MNL':
            if predict:
                add_layer = L.BatchNormalization(size=parameter['size'])
            else:
                add_layer = MNL.MultiNodeBatchNormalization(
                    size=parameter['size'], comm=comm)
        elif name == 'Flat':
            add_layer = partial(lambda *x: x[0])
        model.append(add_layer)
    return model
Example #3
0
    def __init__(self, inplanes, gpu):
        super(hourglass, self).__init__()
        self.gpu = gpu

        self.conv1 = Sequential(
            convbn_3d(inplanes, inplanes * 2, kernel_size=3, stride=2, pad=1),
            F.relu).to_gpu(self.gpu)

        self.conv2 = convbn_3d(inplanes * 2,
                               inplanes * 2,
                               kernel_size=3,
                               stride=1,
                               pad=1).to_gpu(self.gpu)

        self.conv3 = Sequential(
            convbn_3d(inplanes * 2,
                      inplanes * 2,
                      kernel_size=3,
                      stride=2,
                      pad=1), F.relu).to_gpu(self.gpu)

        self.conv4 = Sequential(
            convbn_3d(inplanes * 2,
                      inplanes * 2,
                      kernel_size=3,
                      stride=1,
                      pad=1), F.relu).to_gpu(self.gpu)

        self.conv5 = Sequential(
            L.DeconvolutionND(3,
                              inplanes * 2,
                              inplanes * 2,
                              ksize=4,
                              stride=2,
                              pad=1,
                              nobias=True,
                              initialW=ini.Normal(math.sqrt(2. / 32))),
            L.BatchNormalization(inplanes * 2,
                                 eps=1e-5,
                                 decay=0.95,
                                 initial_gamma=ini.One(),
                                 initial_beta=ini.Zero())).to_gpu(
                                     self.gpu)  # +conv2

        self.conv6 = Sequential(
            L.DeconvolutionND(3,
                              inplanes * 2,
                              inplanes,
                              ksize=4,
                              stride=2,
                              pad=1,
                              nobias=True),
            L.BatchNormalization(inplanes,
                                 eps=1e-5,
                                 decay=0.95,
                                 initial_gamma=ini.One(),
                                 initial_beta=ini.Zero())).to_gpu(
                                     self.gpu)  # +x
Example #4
0
 def __init__(self, hidden_units):
     print(hidden_units)
     super(ShallowConcateS, self).__init__()
     with self.init_scope():
         self.layers = Sequential()
         for layer_units in hidden_units:
             self.layers.append(L.Linear(None, layer_units))
             self.layers.append(F.relu)
             self.layers.append(L.BatchNormalization(layer_units))
             self.layers.append(F.dropout)
         self.last = L.Linear(None, 2)
     print(self.layers)
Example #5
0
 def __init__(self, channels, ksize, repeat):
     super().__init__()
     self.channels = channels
     self.ksize = ksize
     self.repeat = repeat
     with self.init_scope():
         self.convs = Sequential(
             Convolution2D(channels * 2,
                           channels * 2,
                           ksize=ksize,
                           stride=1,
                           pad=0), relu).repeat(repeat)
         self.convs.append(
             Convolution2D(channels * 2, channels, ksize=1, stride=1,
                           pad=0))
Example #6
0
	def __init__(self, levels=7, first_channels=16, last_channels=512, categories=1, depth=8, group_size=None):
		super().__init__()
		in_channels = [first_channels] * (levels - 1)
		out_channels = [last_channels] * (levels - 1)
		for i in range(1, levels - 1):
			channels = min(first_channels * 2 ** i, last_channels)
			in_channels[i] = channels
			out_channels[i - 1] = channels
		with self.init_scope():
			self.main = Sequential(
				FromRGB(first_channels),
				Sequential(*[ResidualBlock(i, o) for i, o in zip(in_channels, out_channels)]),
				OutputBlock(last_channels, categories > 1, group_size))
			if categories > 1:
				self.embedder = EqualizedLinear(categories, last_channels, gain=1)
				self.mapper = Sequential(EqualizedLinear(last_channels, last_channels), LeakyRelu()).repeat(depth)
def fully_connected(input_dim, output_dim, with_bn=True):
    bn = L.BatchNormalization(output_dim) if with_bn else F.identity
    return Sequential(
        L.Linear(input_dim, output_dim),
        bn,
        F.relu,
    )
Example #8
0
    def __init__(self, channels: int, num_classes: int, layers: int, auxiliary, genotype):
        super(NetworkCIFAR, self).__init__()
        self._layers = layers
        self._auxiliary = auxiliary

        stem_multiplier = 3
        curr_ch = stem_multiplier * channels
        self.stem = Sequential(
            links.Convolution2D(3, curr_ch, 3, pad=1, nobias=True),
            links.BatchNormalization(curr_ch)
        )

        pp_ch, p_ch, curr_ch = curr_ch, curr_ch, channels
        self.cells = chainer.ChainList()
        reduction_prev = False
        ch_to_auxiliary: int = -1
        for i in range(layers):
            if i in [layers // 3, 2 * layers // 3]:
                curr_ch *= 2
                reduction = True
            else:
                reduction = False
            cell = Cell(genotype, pp_ch, p_ch, curr_ch, reduction, reduction_prev)
            reduction_prev = reduction
            self.cells.add_link(cell)
            pp_ch, p_ch = p_ch, cell.multiplier * curr_ch
            if i == 2 * layers // 3:
                ch_to_auxiliary = p_ch
        assert ch_to_auxiliary != -1
        if auxiliary:
            self.auxiliary_head = AuxiliaryHeadCIFAR(ch_to_auxiliary, num_classes)
        self.classifier = links.Linear(p_ch, num_classes)
Example #9
0
 def __init__(self,
              in_channels,
              out_channels,
              k_size,
              stride,
              padding,
              dilation,
              affine=True):
     super(DilConv, self).__init__()
     with self.init_scope():
         self.op = Sequential(
             func.relu,
             links.Convolution2D(in_channels,
                                 out_channels,
                                 ksize=k_size,
                                 stride=stride,
                                 pad=padding,
                                 dilate=dilation,
                                 groups=in_channels,
                                 nobias=True),
             links.Convolution2D(in_channels,
                                 out_channels,
                                 ksize=1,
                                 pad=0,
                                 nobias=True),
             links.BatchNormalization(out_channels,
                                      use_gamma=affine,
                                      use_beta=affine),
         )
Example #10
0
 def __init__(self, inplanes, planes, stride=1):
     super(BasicBlock, self).__init__()
     with self.init_scope():
         self.conv1 = L.Convolution2D(inplanes,
                                      planes,
                                      ksize=3,
                                      stride=stride,
                                      pad=1,
                                      nobias=True)
         self.bn1 = L.BatchNormalization(planes)
         self.conv2 = L.Convolution2D(planes,
                                      planes,
                                      ksize=3,
                                      stride=1,
                                      pad=1,
                                      nobias=True)
         self.bn2 = L.BatchNormalization(planes)
         if inplanes != planes:
             self.downsample = Sequential(
                 L.Convolution2D(inplanes,
                                 planes,
                                 ksize=1,
                                 stride=stride,
                                 nobias=True), L.BatchNormalization(planes))
         else:
             self.downsample = lambda x: x
Example #11
0
 def __init__(self,
              n_inputs,
              n_outputs,
              ksize,
              stride,
              dilate,
              pad,
              dropout=0.2):
     super(TemporalBlock, self).__init__()
     with self.init_scope():
         self.conv1 = DilatedConvolution1D(n_inputs,
                                           n_outputs,
                                           ksize,
                                           stride=stride,
                                           pad=pad,
                                           dilate=dilate)
         self.chomp1 = Chomp1d(pad)
         self.relu1 = F.relu
         self.dropout1 = partial(F.dropout, ratio=dropout)
         self.conv2 = DilatedConvolution1D(n_outputs,
                                           n_outputs,
                                           ksize,
                                           stride=stride,
                                           pad=pad,
                                           dilate=dilate)
         self.chomp2 = Chomp1d(pad)
         self.relu2 = F.relu
         self.dropout2 = partial(F.dropout, ratio=dropout)
         self.net = Sequential(self.conv1, self.chomp1, self.relu1,
                               self.dropout1, self.conv2, self.chomp2,
                               self.relu2, self.dropout2)
         self.downsample = L.ConvolutionND(
             1, n_inputs, n_outputs, 1) if n_inputs != n_outputs else None
         self.relu = F.relu
Example #12
0
 def __init__(self, channels, blocks, ksize):
     super().__init__()
     with self.init_scope():
         self.frgb = Convolution2D(3, channels, ksize=1)
         self.resnet = Sequential(ResidualBlock(channels,
                                                ksize)).repeat(blocks)
         self.trgb = Convolution2D(channels, 3, ksize=1)
Example #13
0
    def _make_layer(self, block, planes, blocks, stride):
        strides = [stride] + [1] * (blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.inplane, planes, stride))
            self.inplane = planes

        return Sequential(*layers)
Example #14
0
 def __init__(self, channels, blocks, ksize):
     super().__init__()
     with self.init_scope():
         self.resnet = Sequential(ResidualBlock(channels,
                                                ksize)).repeat(blocks)
         self.c = Convolution2D(channels + 3, channels, ksize=3)
         self.r = LeakyReluLink()
         self.up = Upsampler()
Example #15
0
    def __init__(self,
                 classes,
                 pretrained_resnet,
                 use_roialign=False,
                 mean_path="",
                 min_size=512,
                 max_size=512):
        super(FPN101, self).__init__()
        self.classes = classes
        self.use_roialign = use_roialign

        self.min_size = min_size
        self.max_size = max_size

        mean = np.load(mean_path)
        self.mean = np.resize(mean, (3, self.min_size, self.min_size))

        with self.init_scope():
            self.top_layer = L.Convolution2D(2048,
                                             256,
                                             ksize=1,
                                             stride=1,
                                             pad=0)  # Reduce channels
            # smooth layer
            self.smooth1 = L.Convolution2D(256, 256, ksize=3, stride=1, pad=1)
            self.smooth2 = L.Convolution2D(256, 256, ksize=3, stride=1, pad=1)
            self.smooth3 = L.Convolution2D(256, 256, ksize=3, stride=1, pad=1)
            # Lateral layers
            self.latlayer1 = L.Convolution2D(1024,
                                             256,
                                             ksize=1,
                                             stride=1,
                                             pad=0)
            self.latlayer2 = L.Convolution2D(512,
                                             256,
                                             ksize=1,
                                             stride=1,
                                             pad=0)
            self.latlayer3 = L.Convolution2D(256,
                                             256,
                                             ksize=1,
                                             stride=1,
                                             pad=0)
            self.roi_feat_downsample = L.Convolution2D(256,
                                                       256,
                                                       ksize=3,
                                                       stride=2,
                                                       pad=1)

            self.rcnn_top = Sequential(
                L.Convolution2D(256, 1024, ksize=7, stride=7, pad=0), F.relu,
                L.Convolution2D(1024, 1024, ksize=1, stride=1, pad=0), F.relu)
            self.cls_score = L.Linear(1024, self.classes)
            self.resnet101 = ResNet101Layers(
                pretrained_model=self._models["resnet101"]["path"])
            if pretrained_resnet.endswith(".npz"):
                print("loading :{} to FPN-101".format(pretrained_resnet))
                chainer.serializers.load_npz(pretrained_resnet, self)
Example #16
0
 def __init__(self, channels: int, stride):
     super(MixedOp, self).__init__()
     with self.init_scope():
         self._ops = chainer.ChainList()
         for primitive in PRIMITIVES:
             op = OPS[primitive](channels, stride, False)
             if 'pool' in primitive:
                 op = Sequential(op, links.BatchNormalization(channels, use_gamma=False, use_beta=False))
             self._ops.add_link(op)
Example #17
0
def convbn_3d(in_planes, out_planes, kernel_size, stride, pad):
    return Sequential(
        L.ConvolutionND(3, in_planes, out_planes,
                        ksize=kernel_size, stride=stride,
                        pad=pad, nobias=True,
                        initialW=ini.Normal(math.sqrt(2. / (kernel_size * kernel_size * kernel_size * out_planes)))),
        L.BatchNormalization(out_planes, eps=1e-5, decay=0.95,
                             initial_gamma=ini.One(), initial_beta=ini.Zero()),
    )
Example #18
0
def make_layers_reverse(kernel,
                        input_dim,
                        output_dim,
                        modules,
                        layer=conv_bn_relu):
    heads = layer(kernel, input_dim, input_dim).repeat(modules - 1)
    tail = layer(kernel, input_dim, output_dim)

    return Sequential(heads, tail)
Example #19
0
    def __init__(self, inplanes, planes, stride, downsample, pad, dilation):
        super(BasicBlock, self).__init__()
        with self.init_scope():
            self.conv1 = Sequential(convbn(inplanes, planes, 3, stride, pad, dilation),
                                    F.relu)

            self.conv2 = convbn(planes, planes, 3, 1, pad, dilation)

            self.downsample = downsample
            self.stride = stride
Example #20
0
    def __init__(self, channels: int, num_classes: int, layers: int, auxiliary, genotype):
        super(NetworkImageNet, self).__init__()
        self._layers = layers
        self._auxiliary = auxiliary

        self.stem0 = Sequential(
            links.Convolution2D(3, channels // 2, ksize=3, stride=2, pad=1, nobias=True),
            links.BatchNormalization(channels // 2),
            func.relu,
            links.Convolution2D(channels // 2, channels, ksize=3, stride=2, pad=1, nobias=True),
            links.BatchNormalization(channels),
        )

        self.stem1 = Sequential(
            func.relu,
            links.Convolution2D(channels, channels, 3, stride=2, pad=1, nobias=True),
            links.BatchNormalization(channels),
        )

        pp_ch, p_ch, curr_ch = channels, channels, channels

        self.cells = chainer.ChainList()
        reduction_prev = True
        ch_to_auxiliary = -1
        for i in range(layers):
            if i in [layers // 3, 2 * layers // 3]:
                curr_ch *= 2
                reduction = True
            else:
                reduction = False
            cell = Cell(genotype, pp_ch, p_ch, curr_ch, reduction, reduction_prev)
            reduction_prev = reduction
            self.cells.add_link(cell)
            pp_ch, p_ch = p_ch, cell.multiplier * curr_ch
            if i == 2 * layers // 3:
                ch_to_auxiliary = p_ch
        assert ch_to_auxiliary != -1
        if auxiliary:
            self.auxiliary_head = AuxiliaryHeadImageNet(ch_to_auxiliary, num_classes)
        self.classifier = links.Linear(p_ch, num_classes)
Example #21
0
class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size):
        self.net = Sequential(L.Linear(input_size, hidden_size), F.relu,
                              L.Linear(hidden_size, output_size))

    def predict(self, x):
        return self.net(x)

    def loss(self, x, t):
        y = self.predict(x)
        return F.softmax_cross_entropy(y, t)

    def accuracy(self, x, t):
        y = self.predict(x)
        accuracy = F.accuracy(y, t)
        return accuracy

    def gradient(self, x, t):
        loss = self.loss(x, t)
        self.net.cleargrads()
        loss.backward()
        return loss.grad
Example #22
0
    def prepare(self, x, t):

        x = x.astype('float32')
        t = t.astype('float32')
        indices = np.arange(len(t))  #インデックスリスト

        #データ全体を訓練データ&検証データセットとテストデータセットに分割
        self.x_train_val, self.x_test, self.t_train_val, self.t_test, self.train_indices, self.test_indices \
            = train_test_split(x, t, indices, test_size=0.3)
        #訓練データと検証データセットに分割
        self.x_train, self.x_val, self.t_train, self.t_val = train_test_split(
            self.x_train_val, self.t_train_val, test_size=0.3)

        #ネットワークの定義
        self.net = Sequential(L.Linear(self.n_input, self.n_hidden), F.relu,
                              L.Linear(self.n_hidden, self.n_hidden), F.relu,
                              L.Linear(self.n_hidden, self.n_output))

        #最適化手法の定義
        self.optimizer = chainer.optimizers.SGD(lr=0.001)

        #ネットワークのパラメータを optimizer 設定
        self.optimizer.setup(self.net)
Example #23
0
 def __init__(self, channels, num_classes):
     """assuming input size 14x14"""
     super(AuxiliaryHeadImageNet, self).__init__()
     self.features = Sequential(
         func.relu,
         func.AveragePooling2D(ksize=5, stride=2, pad=0, cover_all=False).apply,
         links.Convolution2D(channels, 128, 1, nobias=True),
         links.BatchNormalization(128),
         func.relu,
         links.Convolution2D(128, 768, 2, nobias=True),
         links.BatchNormalization(768),
         func.relu
     )
     self.classifier = links.Linear(768, num_classes)
Example #24
0
    def __init__(self, in_ch, out_ch, w_init=inits.HeNormal()):
        super(OCBlock, self).__init__()

        with self.init_scope():
            self.conv_pre = Sequential(
                L.Convolution2D(in_ch,
                                out_ch,
                                ksize=3,
                                pad=1,
                                nobias=True,
                                initialW=w_init),
                AutomaticBatchRenormalization(out_ch, decay=0.99))
            self.context = ObjectContextPooling(out_ch,
                                                out_ch // 2,
                                                out_ch,
                                                w_init=w_init)
            self.conv_post = Sequential(
                L.Convolution2D(2 * out_ch,
                                out_ch,
                                ksize=1,
                                pad=0,
                                nobias=True,
                                initialW=w_init),
                AutomaticBatchRenormalization(out_ch, decay=0.99))
Example #25
0
    def inference(self, input_data):
        loaded_net = Sequential(L.Linear(self.n_input, self.n_hidden), F.relu,
                                L.Linear(self.n_hidden, self.n_hidden), F.relu,
                                L.Linear(self.n_hidden, self.n_output))
        chainer.serializers.load_npz('sample.net', loaded_net)
        with chainer.using_config('train', False), chainer.using_config(
                'enable_backprop', False):
            y_test = loaded_net(input_data)

        #self.t_test.data.dtype = np.float32
        y_test = y_test[:].array
        y_test = y_test.T
        y_test = y_test[0]

        return y_test
Example #26
0
def inferrene_cifar10(test_data, test_label):
    # ニューラルネットを使って推論
    loaded_net = Sequential(
        L.Linear(n_input, n_hidden), F.relu,
        L.Linear(n_hidden, n_hidden), F.relu,
        L.Linear(n_hidden, n_output)
    )

    chainer.serializers.load_npz('my_cifar10.net', loaded_net)

    with chainer.using_config('train', False), chainer.using_config('enablebackprop', False):
        results_test = loaded_net(test_data)

    print(np.argmax(results_test[0, :].array))

    return 0
Example #27
0
def conv_bn_relu(kernel_size,
                 input_dim,
                 output_dim,
                 stride=1,
                 with_bn=True) -> Sequential:
    pad = (kernel_size - 1) // 2
    bn = L.BatchNormalization(output_dim) if with_bn else F.identity
    return Sequential(
        L.Convolution2D(input_dim,
                        output_dim,
                        kernel_size,
                        stride,
                        pad,
                        nobias=(not with_bn)),
        bn,
        F.relu,
    )
Example #28
0
def define_upsampling(opt, input_ch, output_ch=None):
    if opt.upsampling_mode == 'bilinear':
        seq = Sequential(lambda x: F.resize_images(
            x, (x.shape[2] * 2, x.shape[3] * 2), mode='bilinear'))

        if output_ch is not None:
            seq.append(
                define_conv(opt)(input_ch,
                                 output_ch,
                                 ksize=3,
                                 stride=1,
                                 pad=1,
                                 initialW=HeNormal()))

        return seq

    if opt.upsampling_mode == 'nearest':
        seq = Sequential(lambda x: F.resize_images(
            x, (x.shape[2] * 2, x.shape[3] * 2), mode='nearest'))

        if output_ch is not None:
            seq.append(
                define_conv(opt)(input_ch,
                                 output_ch,
                                 ksize=3,
                                 stride=1,
                                 pad=1,
                                 initialW=HeNormal()))

        return seq

    if opt.upsampling_mode == 'deconv':
        return define_deconv(opt)(input_ch,
                                  input_ch if output_ch is None else output_ch,
                                  ksize=3,
                                  stride=1,
                                  pad=1,
                                  initialW=HeNormal())

    if opt.upsampling_mode == 'subpx_conv':
        return PixelShuffler(opt, input_ch,
                             input_ch if output_ch is None else output_ch)
Example #29
0
    def __init__(self, in_ch, key_ch, out_ch, w_init=inits.HeNormal()):
        super(ObjectContextPooling, self).__init__()

        with self.init_scope():
            self.f_key = Sequential(
                L.Convolution2D(in_ch,
                                key_ch,
                                ksize=1,
                                pad=0,
                                nobias=True,
                                initialW=w_init),
                AutomaticBatchRenormalization(key_ch, decay=0.99))
            self.f_value = L.Convolution2D(in_ch,
                                           out_ch,
                                           ksize=1,
                                           pad=0,
                                           nobias=False,
                                           initialW=w_init)
Example #30
0
    def __init__(self, channels: int, num_classes: int, layers: int,
                 steps: int=4, multiplier: int=4, stem_multiplier: int=3,
                 criterion=func.softmax_cross_entropy):
        super(Network, self).__init__()

        # 初項2 (入力数), 項数steps, 公差1
        k = int(steps * (2 * 2 + (steps - 1)) / 2)
        num_ops = len(PRIMITIVES)
        self._steps = steps
        self._multiplier = multiplier

        curr_ch = stem_multiplier * channels

        with self.init_scope():
            self.stem = Sequential(
                links.Convolution2D(in_channels=3, out_channels=curr_ch, ksize=3, pad=1, nobias=True),
                links.BatchNormalization(curr_ch)
            )

            pp_ch, p_ch, curr_ch = curr_ch, curr_ch, channels
            self.cells = chainer.ChainList()
            reduction_prev = False

            for i in range(layers):
                if i in [layers // 3, 2 * layers // 3]:
                    curr_ch *= 2
                    reduction = True
                else:
                    reduction = False
                cell = Cell(steps, multiplier, pp_ch, p_ch, curr_ch, reduction, reduction_prev)
                reduction_prev = reduction
                self.cells.add_link(cell)
                pp_ch, p_ch = p_ch, multiplier * curr_ch

            self.classifier = links.Linear(p_ch, num_classes)
        # params()ではalphaを取得したくないのでinit_scopeの外に出す
        # 元実装に従うけどこれはresumeしない前提なのかな
        self.alphas_normal = Attention(k, num_ops)
        self.alphas_reduce = Attention(k, num_ops)
        self._arch_parameters = [self.alphas_normal, self.alphas_reduce]
        self.normal_shape = self.alphas_normal.attention.shape
        self.reduce_shape = self.alphas_reduce.attention.shape

        self._criterion = criterion