Example #1
0
 def __init__(self, in_channels, num_codes):
     super().__init__()
     self.encoding_project = layers.ConvBNReLU(
         in_channels,
         in_channels,
         1,
     )
     self.encoding = nn.Sequential(
         Encoding(channels=in_channels, num_codes=num_codes),
         nn.BatchNorm1D(num_codes),
         nn.ReLU(),
     )
     self.fc = nn.Sequential(
         nn.Linear(in_channels, in_channels),
         nn.Sigmoid(),
     )
     self.in_channels = in_channels
Example #2
0
	def __init__(self, block, depth, num_classes=1000, with_pool=True):
		super(ResNet, self).__init__()
		layer_cfg = {
			18: [2, 2, 2, 2],
			34: [3, 4, 6, 3],
			50: [3, 4, 6, 3],
			101: [3, 4, 23, 3],
			152: [3, 8, 36, 3]
		}
		layers = layer_cfg[depth]
		self.num_classes = num_classes
		self.with_pool = with_pool
		self._norm_layer = nn.BatchNorm2D

		self.inplanes = 64
		self.dilation = 1

		self.conv1 = nn.Conv2D(
			3,
			self.inplanes,
			kernel_size=7,
			stride=2,
			padding=3,
			bias_attr=False)
		self.bn1 = self._norm_layer(self.inplanes)
		self.relu = nn.ReLU()
		self.maxpool = nn.MaxPool2D(kernel_size=3, stride=2, padding=1)
		self.layer1 = self._make_layer(block, 64, layers[0])
		self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
		self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
		self.layer4 = self._make_layer(block, 512, layers[3], stride=2)

		for m in self.children():
			if isinstance(m, nn.Conv2D):
				n = m._kernel_size[0] * m._kernel_size[1] * m._out_channels#m.weight.shape[0] * m.weight.shape[1] * m.weight.shape[2]
				v = np.random.normal(loc=0., scale=np.sqrt(2. / n), size=m.weight.shape).astype('float32')
				m.weight.set_value(v)
			elif isinstance(m, nn.BatchNorm2D):
				m.weight.set_value(np.ones(m.weight.shape).astype('float32'))
				m.bias.set_value(np.zeros(m.bias.shape).astype('float32'))

		if with_pool:
			self.avgpool = nn.AdaptiveAvgPool2D((1, 1))

		if num_classes > 0:
			self.fc = nn.Linear(512 * block.expansion, num_classes)
Example #3
0
 def __init__(self, block, layers, use_se=True):
     self.inplanes = 64
     self.use_se = use_se
     super(ResNetFace, self).__init__()
     self.conv1 = nn.Conv2D(3, 64, kernel_size=3, padding=1)
     self.bn1 = nn.BatchNorm2D(64)
     self.prelu = nn.PReLU()
     self.maxpool = nn.MaxPool2D(kernel_size=2, stride=2)
     self.layer1 = self._make_layer(block, 64, layers[0])
     self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
     self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
     self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
     self.bn4 = nn.BatchNorm2D(512)
     self.dropout = nn.Dropout()
     self.flatten = nn.Flatten()
     self.fc5 = nn.Linear(512 * 7 * 7, 512)
     self.bn5 = nn.BatchNorm1D(512)
Example #4
0
 def __init__(self, input_embedding_size, hidden_size, hidden_act,
              num_hidden_layers, num_attention_heads,
              attention_probs_dropout_prob, hidden_dropout_prob,
              intermediate_size, layer_norm_eps):
     super(RemBertEncoder, self).__init__()
     self.embedding_hidden_mapping_in = nn.Linear(input_embedding_size,
                                                  hidden_size)
     self.layer = nn.LayerList([
         RemBertLayer(
             hidden_size=hidden_size,
             num_attention_heads=num_attention_heads,
             attention_probs_dropout_prob=attention_probs_dropout_prob,
             hidden_dropout_prob=hidden_dropout_prob,
             intermediate_size=intermediate_size,
             layer_norm_eps=layer_norm_eps,
             hidden_act=hidden_act) for _ in range(num_hidden_layers)
     ])
Example #5
0
 def __init__(self,
              embedding_size,
              hidden_size,
              vocab_size,
              activation,
              embedding_weights=None):
     super(RoFormerLMPredictionHead, self).__init__()
     self.transform = nn.Linear(hidden_size, embedding_size)
     self.activation = getattr(nn.functional, activation)
     self.layer_norm = nn.LayerNorm(embedding_size)
     self.decoder_weight = (self.create_parameter(
         shape=[vocab_size, embedding_size],
         dtype=self.transform.weight.dtype,
         is_bias=False,
     ) if embedding_weights is None else embedding_weights)
     self.decoder_bias = self.create_parameter(
         shape=[vocab_size], dtype=self.decoder_weight.dtype, is_bias=True)
Example #6
0
    def __init__(self, obs_dim, action_dim):
        super(Critic, self).__init__()

        # Q1 network
        self.l1 = nn.Linear(obs_dim + action_dim, 256)
        self.l2 = nn.Linear(256, 256)
        self.l3 = nn.Linear(256, 1)

        # Q2 network
        self.l4 = nn.Linear(obs_dim + action_dim, 256)
        self.l5 = nn.Linear(256, 256)
        self.l6 = nn.Linear(256, 1)
Example #7
0
    def __init__(self, num_layers=50, seg_num=8, num_classes=400):
        super(TSM_ResNet, self).__init__()

        self.layers = num_layers
        self.seg_num = seg_num
        self.class_dim = num_classes

        if self.layers == 50:
            depth = [3, 4, 6, 3]
        else:
            raise NotImplementedError
        num_filters = [64, 128, 256, 512]

        self.conv = ConvBNLayer(num_channels=3,
                                num_filters=64,
                                filter_size=7,
                                stride=2,
                                act='relu')

        self.bottleneck_block_list = []
        num_channels = 64

        for block in range(len(depth)):
            shortcut = False
            for i in range(depth[block]):
                bottleneck_block = self.add_sublayer(
                    'bb_%d_%d' % (block, i),
                    BottleneckBlock(num_channels=num_channels,
                                    num_filters=num_filters[block],
                                    stride=2 if i == 0 and block != 0 else 1,
                                    shortcut=shortcut,
                                    seg_num=self.seg_num))
                num_channels = int(bottleneck_block._num_channels_out)
                self.bottleneck_block_list.append(bottleneck_block)
                shortcut = True

        stdv = 1.0 / math.sqrt(2048 * 1.0)

        self.out = nn.Linear(
            2048,
            self.class_dim,
            weight_attr=paddle.ParamAttr(
                initializer=nn.initializer.Uniform(-stdv, stdv)),
            bias_attr=paddle.ParamAttr(
                learning_rate=2.0, regularizer=paddle.regularizer.L2Decay(0.)))
Example #8
0
    def __init__(self, in_channels, num_fiducial):
        super(GridGenerator, self).__init__()
        self.eps = 1e-6
        self.F = num_fiducial

        name = "ex_fc"
        initializer = nn.initializer.Constant(value=0.0)
        param_attr = ParamAttr(learning_rate=0.0,
                               initializer=initializer,
                               name=name + "_w")
        bias_attr = ParamAttr(learning_rate=0.0,
                              initializer=initializer,
                              name=name + "_b")
        self.fc = nn.Linear(in_channels,
                            6,
                            weight_attr=param_attr,
                            bias_attr=bias_attr,
                            name=name)
Example #9
0
    def __init__(self,
                 input_size,
                 next_k=1,
                 num_channels=[64, 128, 256],
                 kernel_size=2,
                 dropout=0.2):
        super(TCNNetwork, self).__init__()

        self.last_num_channel = num_channels[-1]

        self.tcn = TCNEncoder(
            input_size=input_size,
            num_channels=num_channels,
            kernel_size=kernel_size,
            dropout=dropout)

        self.linear = nn.Linear(
            in_features=self.last_num_channel, out_features=next_k)
 def __init__(self, block, layers, num_classes=1000):
     self.inplanes = 64
     super(ResNet, self).__init__()
     self.conv1 = nn.Conv2D(3,
                            64,
                            kernel_size=7,
                            stride=2,
                            padding=3,
                            bias_attr=False)
     self.bn1 = nn.BatchNorm(64)
     self.relu = nn.ReLU()
     self.maxpool = nn.Pool2D(pool_size=3, pool_stride=2, pool_padding=1)
     self.layer1 = self._make_layer(block, 64, layers[0])
     self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
     self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
     self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
     self.avgpool = nn.Pool2D(7, pool_stride=1, pool_type='avg')
     self.fc = nn.Linear(512 * block.expansion, num_classes)
Example #11
0
File: model.py Project: Yelrose/PGL
 def __init__(self,
              input_size,
              num_class,
              num_layers=1,
              hidden_size=64,
              dropout=0.5,
              **kwargs):
     super(GraphSage, self).__init__()
     self.num_class = num_class
     self.num_layers = num_layers
     self.hidden_size = hidden_size
     self.dropout = dropout
     self.convs = nn.LayerList()
     self.linear = nn.Linear(self.hidden_size, self.num_class)
     for i in range(self.num_layers):
         self.convs.append(
             pgl.nn.GraphSageConv(input_size if i == 0 else hidden_size,
                                  hidden_size))
Example #12
0
 def __init__(self, alpha, num_classes=100, dropout=0.2):
     super(MNASNet, self).__init__()
     assert alpha > 0.0
     self.alpha = alpha
     self.num_classes = num_classes
     depths = _get_depths(alpha)
     layers = [
         # First layer: regular conv.
         nn.Conv2D(3, depths[0], 3, padding=1, stride=1, bias_attr=False),
         nn.BatchNorm2D(depths[0], momentum=_BN_MOMENTUM),
         nn.ReLU(),
         # Depthwise separable, no skip.
         nn.Conv2D(depths[0],
                   depths[0],
                   3,
                   padding=1,
                   stride=1,
                   groups=depths[0],
                   bias_attr=False),
         nn.BatchNorm2D(depths[0], momentum=_BN_MOMENTUM),
         nn.ReLU(),
         nn.Conv2D(depths[0],
                   depths[1],
                   1,
                   padding=0,
                   stride=1,
                   bias_attr=False),
         nn.BatchNorm2D(depths[1], momentum=_BN_MOMENTUM),
         # MNASNet blocks: stacks of inverted residuals.
         _stack(depths[1], depths[2], 3, 2, 3, 3, _BN_MOMENTUM),
         _stack(depths[2], depths[3], 5, 1, 3, 3, _BN_MOMENTUM),
         _stack(depths[3], depths[4], 5, 2, 6, 3, _BN_MOMENTUM),
         _stack(depths[4], depths[5], 3, 1, 6, 2, _BN_MOMENTUM),
         _stack(depths[5], depths[6], 5, 2, 6, 4, _BN_MOMENTUM),
         _stack(depths[6], depths[7], 3, 1, 6, 1, _BN_MOMENTUM),
         # Final mapping to classifier input.
         nn.Conv2D(depths[7], 1280, 1, padding=0, stride=1,
                   bias_attr=False),
         nn.BatchNorm2D(1280, momentum=_BN_MOMENTUM),
         nn.ReLU(),
     ]
     self.layers = nn.Sequential(*layers)
     self.classifier = nn.Sequential(nn.Dropout(p=dropout),
                                     nn.Linear(1280, num_classes))
Example #13
0
    def __init__(
        self,
        num_blocks,
        width_multiplier=None,
        override_groups_map=None,
        class_dim=1000,
        with_pool=True,
    ):
        super(RepVGG, self).__init__()
        assert len(width_multiplier) == 4
        self.class_dim = class_dim
        self.with_pool = with_pool
        self.override_groups_map = override_groups_map or dict()

        assert 0 not in self.override_groups_map

        self.in_planes = min(64, int(64 * width_multiplier[0]))

        self.stage0 = RepVGGBlock(
            in_channels=3,
            out_channels=self.in_planes,
            kernel_size=3,
            stride=2,
            padding=1,
        )
        self.cur_layer_idx = 1
        self.stage1 = self._make_stage(
            int(64 * width_multiplier[0]), num_blocks[0], stride=2
        )
        self.stage2 = self._make_stage(
            int(128 * width_multiplier[1]), num_blocks[1], stride=2
        )
        self.stage3 = self._make_stage(
            int(256 * width_multiplier[2]), num_blocks[2], stride=2
        )
        self.stage4 = self._make_stage(
            int(512 * width_multiplier[3]), num_blocks[3], stride=2
        )

        if with_pool:
            self.gap = nn.AdaptiveAvgPool2D(output_size=1)

        if class_dim > 0:
            self.linear = nn.Linear(int(512 * width_multiplier[3]), class_dim)
Example #14
0
    def __init__(self,
                 block,
                 depth=50,
                 width=64,
                 num_classes=1000,
                 with_pool=True,
                 groups=1):
        super(ResNet, self).__init__()
        layer_cfg = {
            18: [2, 2, 2, 2],
            34: [3, 4, 6, 3],
            50: [3, 4, 6, 3],
            101: [3, 4, 23, 3],
            152: [3, 8, 36, 3]
        }
        layers = layer_cfg[depth]
        self.groups = groups
        self.base_width = width
        self.num_classes = num_classes
        self.with_pool = with_pool
        self._norm_layer = nn.BatchNorm2D

        self.inplanes = 64
        self.dilation = 1

        self.conv1 = nn.Conv2D(
            3,
            self.inplanes,
            kernel_size=7,
            stride=2,
            padding=3,
            bias_attr=False)
        self.bn1 = self._norm_layer(self.inplanes)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2D(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        if with_pool:
            self.avgpool = nn.AdaptiveAvgPool2D((1, 1))

        if num_classes > 0:
            self.fc = nn.Linear(512 * block.expansion, num_classes)
Example #15
0
 def __init__(self, inp_dim, hidden_dim, num_layers,batch_norm=False, dropout=0.):
     super(MLP, self).__init__()
     layer_list = OrderedDict()
     in_dim = inp_dim
     for l in range(num_layers):
         layer_list['fc{}'.format(l)] = nn.Linear(in_dim, hidden_dim)
         if l < num_layers - 1:
             if batch_norm:
                 layer_list['norm{}'.format(l)] = nn.BatchNorm1D(num_features=hidden_dim)
             layer_list['relu{}'.format(l)] = nn.LeakyReLU()
             if dropout > 0:
                 layer_list['drop{}'.format(l)] = nn.Dropout(p=dropout)
         in_dim = hidden_dim
     if num_layers > 0:
         self.network = nn.Sequential()
         for i in layer_list:
             self.network.add_sublayer(i, layer_list[i])
     else:
         self.network = nn.Identity()
Example #16
0
    def __init__(self, char_vocab_size, char_embed_dim, projection_dim,
                 num_highways, cnn_filters, max_characters_per_token):
        super(ELMoCharacterEncoderLayer, self).__init__()

        self._use_highway = (num_highways > 0)
        self._n_filters = sum(f[1] for f in cnn_filters)
        self._use_proj = (self._n_filters != projection_dim)

        paramAttr = paddle.ParamAttr(initializer=I.Uniform(low=-1.0, high=1.0))
        self._char_embedding_layer = nn.Embedding(
            num_embeddings=char_vocab_size,
            embedding_dim=char_embed_dim,
            weight_attr=paramAttr)
        self._char_embedding_layer.weight[0, :] = 0

        self._convolution_layers = []
        for i, (width, num) in enumerate(cnn_filters):
            paramAttr = paddle.ParamAttr(
                initializer=I.Uniform(low=-0.05, high=0.05))
            conv2d = nn.Conv2D(in_channels=char_embed_dim,
                               out_channels=num,
                               kernel_size=(1, width),
                               padding='Valid',
                               data_format='NHWC',
                               weight_attr=paramAttr)
            max_pool = nn.MaxPool2D(kernel_size=(1, max_characters_per_token -
                                                 width + 1),
                                    stride=(1, 1),
                                    padding='Valid',
                                    data_format='NHWC')
            self.add_sublayer('cnn_layer_{}'.format(i), conv2d)
            self.add_sublayer('maxpool_layer_{}'.format(i), max_pool)
            self._convolution_layers.append([width, conv2d, max_pool])

        self._relu = nn.ReLU()
        if self._use_highway:
            self._highway_layer = Highway(self._n_filters, num_highways)
        if self._use_proj:
            paramAttr = paddle.ParamAttr(initializer=I.Normal(
                mean=0.0, std=1.0 / np.sqrt(self._n_filters)))
            self._linear_layer = nn.Linear(self._n_filters,
                                           projection_dim,
                                           weight_attr=paramAttr)
Example #17
0
    def __init__(self, label_list: list = None, load_checkpoint: str = None):
        super(RepVGG_B1G4, self).__init__()

        if label_list is not None:
            self.labels = label_list
            class_dim = len(self.labels)
        else:
            label_list = []
            label_file = os.path.join(self.directory, 'label_list.txt')
            files = open(label_file)
            for line in files.readlines():
                line = line.strip('\n')
                label_list.append(line)
            self.labels = label_list
            class_dim = len(self.labels)

        num_blocks = [4, 6, 16, 1]
        width_multiplier = [2, 2, 2, 4]
        optional_groupwise_layers = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26]
        self.override_groups_map = {l: 4 for l in optional_groupwise_layers}

        assert 0 not in self.override_groups_map

        self.in_planes = min(64, int(64 * width_multiplier[0]))

        self.stage0 = RepVGGBlock(in_channels=3, out_channels=self.in_planes, kernel_size=3, stride=2, padding=1)
        self.cur_layer_idx = 1
        self.stage1 = self._make_stage(int(64 * width_multiplier[0]), num_blocks[0], stride=2)
        self.stage2 = self._make_stage(int(128 * width_multiplier[1]), num_blocks[1], stride=2)
        self.stage3 = self._make_stage(int(256 * width_multiplier[2]), num_blocks[2], stride=2)
        self.stage4 = self._make_stage(int(512 * width_multiplier[3]), num_blocks[3], stride=2)
        self.gap = nn.AdaptiveAvgPool2D(output_size=1)
        self.linear = nn.Linear(int(512 * width_multiplier[3]), class_dim)

        if load_checkpoint is not None:
            self.model_dict = paddle.load(load_checkpoint)
            self.set_dict(self.model_dict)
            print("load custom checkpoint success")
        else:
            checkpoint = os.path.join(self.directory, 'model.pdparams')
            self.model_dict = paddle.load(checkpoint)
            self.set_dict(self.model_dict)
            print("load pretrained checkpoint success")
Example #18
0
 def __init__(self,
              hidden_size,
              vocab_size,
              activation,
              embedding_weights=None):
     super(BertLMPredictionHead, self).__init__()
     self.weight_attr = paddle.ParamAttr(
         initializer=paddle.fluid.initializer.ConstantInitializer(value=0.000001))
     # self.transform = nn.Linear(hidden_size, hidden_size)
     self.transform = nn.Linear(hidden_size, hidden_size, weight_attr=self.weight_attr, bias_attr=False)
     self.activation = getattr(nn.functional, activation)
     self.layer_norm = nn.LayerNorm(hidden_size)
     self.decoder_weight = self.create_parameter(
         shape=[hidden_size, vocab_size],
         dtype=self.transform.weight.dtype,
         is_bias=False) if embedding_weights is None else embedding_weights
         #is_bias=True) if embedding_weights is None else embedding_weights
     self.decoder_bias = self.create_parameter(
         shape=[vocab_size], dtype=self.decoder_weight.dtype, is_bias=True)
Example #19
0
    def __init__(self, electra):
        super(ElectraGenerator, self).__init__()

        self.electra = electra
        self.generator_predictions = ElectraGeneratorPredictions(
            self.electra.config["embedding_size"],
            self.electra.config["hidden_size"],
            self.electra.config["hidden_act"])

        if not self.tie_word_embeddings:
            self.generator_lm_head = nn.Linear(
                self.electra.config["embedding_size"],
                self.electra.config["vocab_size"])
        else:
            self.generator_lm_head_bias = paddle.fluid.layers.create_parameter(
                shape=[self.electra.config["vocab_size"]],
                dtype=paddle.get_default_dtype(),
                is_bias=True)
        self.init_weights()
 def model_init(self,
                vocab_size,
                embed_dim,
                hidden_size,
                bos_id=0,
                eos_id=1,
                beam_size=4,
                max_step_num=20):
     embedder = paddle.fluid.dygraph.Embedding(size=[vocab_size, embed_dim],
                                               dtype="float64")
     output_layer = nn.Linear(hidden_size, vocab_size)
     cell = nn.LSTMCell(embed_dim, hidden_size)
     self.max_step_num = max_step_num
     self.beam_search_decoder = BeamSearchDecoder(cell,
                                                  start_token=bos_id,
                                                  end_token=eos_id,
                                                  beam_size=beam_size,
                                                  embedding_fn=embedder,
                                                  output_fn=output_layer)
Example #21
0
    def __init__(self,
                 in_dim=256,
                 num_convs=4,
                 conv_dim=256,
                 mlp_dim=1024,
                 resolution=7,
                 norm_type='gn',
                 freeze_norm=False,
                 stage_name=''):
        super(XConvNormHead, self).__init__()
        self.in_dim = in_dim
        self.num_convs = num_convs
        self.conv_dim = conv_dim
        self.mlp_dim = mlp_dim
        self.norm_type = norm_type
        self.freeze_norm = freeze_norm

        self.bbox_head_convs = []
        fan = conv_dim * 3 * 3
        initializer = KaimingNormal(fan_in=fan)
        for i in range(self.num_convs):
            in_c = in_dim if i == 0 else conv_dim
            head_conv_name = stage_name + 'bbox_head_conv{}'.format(i)
            head_conv = self.add_sublayer(
                head_conv_name,
                ConvNormLayer(ch_in=in_c,
                              ch_out=conv_dim,
                              filter_size=3,
                              stride=1,
                              norm_type=self.norm_type,
                              norm_name=head_conv_name + '_norm',
                              freeze_norm=self.freeze_norm,
                              initializer=initializer,
                              name=head_conv_name))
            self.bbox_head_convs.append(head_conv)

        fan = conv_dim * resolution * resolution
        self.fc6 = nn.Linear(conv_dim * resolution * resolution,
                             mlp_dim,
                             weight_attr=paddle.ParamAttr(
                                 initializer=XavierUniform(fan_out=fan)),
                             bias_attr=paddle.ParamAttr(
                                 learning_rate=2., regularizer=L2Decay(0.)))
Example #22
0
 def __init__(self,
              emb_size,
              hidden_size,
              word_num,
              label_num,
              use_w2v_emb=False):
     super(BiGRUWithCRF, self).__init__()
     if use_w2v_emb:
         self.word_emb = TokenEmbedding(
             extended_vocab_path='./conf/word.dic', unknown_token='OOV')
     else:
         self.word_emb = nn.Embedding(word_num, emb_size)
     self.gru = nn.GRU(emb_size,
                       hidden_size,
                       num_layers=2,
                       direction='bidirectional')
     self.fc = nn.Linear(hidden_size * 2, label_num + 2)  # BOS EOS
     self.crf = LinearChainCrf(label_num)
     self.decoder = ViterbiDecoder(self.crf.transitions)
Example #23
0
    def __init__(self,
                 num_classes=80,
                 hidden_dim=512,
                 nhead=8,
                 num_mlp_layers=3,
                 loss='DETRLoss'):
        super(DeformableDETRHead, self).__init__()
        self.num_classes = num_classes
        self.hidden_dim = hidden_dim
        self.nhead = nhead
        self.loss = loss

        self.score_head = nn.Linear(hidden_dim, self.num_classes)
        self.bbox_head = MLP(hidden_dim,
                             hidden_dim,
                             output_dim=4,
                             num_layers=num_mlp_layers)

        self._reset_parameters()
Example #24
0
    def __init__(self, in_features, layer_num=2, low_rank=32, num_experts=4):
        super(CrossNetMix, self).__init__()
        self.layer_num = layer_num
        self.num_experts = num_experts

        # U: (in_features, low_rank)
        self.U_list = paddle.nn.ParameterList([
            paddle.create_parameter(
                shape=[num_experts, in_features, low_rank],
                dtype='float32',
                default_initializer=paddle.nn.initializer.XavierNormal())
            for i in range(self.layer_num)
        ])

        # V: (in_features, low_rank)
        self.V_list = paddle.nn.ParameterList([
            paddle.create_parameter(
                shape=[num_experts, in_features, low_rank],
                dtype='float32',
                default_initializer=paddle.nn.initializer.XavierNormal())
            for i in range(self.layer_num)
        ])

        # C: (low_rank, low_rank)
        self.C_list = paddle.nn.ParameterList([
            paddle.create_parameter(
                shape=[num_experts, low_rank, low_rank],
                dtype='float32',
                default_initializer=paddle.nn.initializer.XavierNormal())
            for i in range(self.layer_num)
        ])

        self.gating = nn.LayerList(
            [nn.Linear(in_features, 1) for i in range(self.num_experts)])

        self.bias = paddle.nn.ParameterList([
            paddle.create_parameter(
                shape=[in_features, 1],
                dtype='float32',
                default_initializer=paddle.nn.initializer.Constant(value=0.0))
            for i in range(self.layer_num)
        ])
Example #25
0
 def __init__(
     self,
     vocab_size,
     embedding_size=768,
     hidden_size=768,
     num_hidden_layers=12,
     num_attention_heads=12,
     intermediate_size=3072,
     hidden_act="gelu",
     hidden_dropout_prob=0.1,
     attention_probs_dropout_prob=0.1,
     max_position_embeddings=1536,
     type_vocab_size=2,
     initializer_range=0.02,
     pad_token_id=0,
     pool_act="tanh",
     rotary_value=False,
 ):
     super(RoFormerModel, self).__init__()
     self.pad_token_id = pad_token_id
     self.initializer_range = initializer_range
     if embedding_size != hidden_size:
         self.embeddings_project = nn.Linear(embedding_size, hidden_size)
     self.embeddings = RoFormerEmbeddings(
         vocab_size,
         embedding_size,
         hidden_dropout_prob,
         type_vocab_size,
     )
     encoder_layer = TransformerEncoderLayerWithRotary(
         hidden_size,
         num_attention_heads,
         intermediate_size,
         dropout=hidden_dropout_prob,
         activation=hidden_act,
         attn_dropout=attention_probs_dropout_prob,
         act_dropout=0,
         rotary_value=rotary_value,
     )
     self.encoder = nn.TransformerEncoder(encoder_layer, num_hidden_layers)
     self.pooler = RoFormerPooler(hidden_size, pool_act)
     self.apply(self.init_weights)
Example #26
0
def Linear(input_size, hidden_size, with_bias=True):
    fan_in = input_size
    bias_bound = 1.0 / math.sqrt(fan_in)
    fc_bias_attr = paddle.ParamAttr(
        initializer=nn.initializer.Uniform(low=-bias_bound, high=bias_bound))

    negative_slope = math.sqrt(5)
    gain = math.sqrt(2.0 / (1 + negative_slope**2))
    std = gain / math.sqrt(fan_in)
    weight_bound = math.sqrt(3.0) * std
    fc_w_attr = paddle.ParamAttr(initializer=nn.initializer.Uniform(
        low=-weight_bound, high=weight_bound))

    if not with_bias:
        fc_bias_attr = False

    return nn.Linear(input_size,
                     hidden_size,
                     weight_attr=fc_w_attr,
                     bias_attr=fc_bias_attr)
    def __init__(self, img_size=256, style_dim=64, num_domains=2, max_conv_dim=512):
        super().__init__()
        dim_in = 2**14 // img_size
        blocks = []
        blocks += [nn.Conv2D(3, dim_in, 3, 1, 1)]

        repeat_num = int(np.log2(img_size)) - 2
        for _ in range(repeat_num):
            dim_out = min(dim_in*2, max_conv_dim)
            blocks += [ResBlk(dim_in, dim_out, downsample=True)]
            dim_in = dim_out

        blocks += [nn.LeakyReLU(0.2)]
        blocks += [nn.Conv2D(dim_out, dim_out, 4, 1, 0)]
        blocks += [nn.LeakyReLU(0.2)]
        self.shared = nn.Sequential(*blocks)

        self.unshared = nn.LayerList()
        for _ in range(num_domains):
            self.unshared.append(nn.Linear(dim_out, style_dim))
Example #28
0
    def __init__(self, config_name, pretrained=True):
        super().__init__()

        assert config_name in CONFIG_NAMES, f'input config {config_name} incorrect, available configs: {CONFIG_NAMES}'

        config_url = URL_BASE + f'config/{config_name}.json'
        config_path = download.get_weights_path_from_url(config_url)
        config = PretrainedConfig.from_pretrained(config_path)

        self.wav2vec2 = Wav2Vec2Model(config)
        self.dropout = nn.Dropout(config.final_dropout)
        self.lm_head = nn.Linear(config.hidden_size, config.vocab_size)

        self.config = config
        if pretrained:
            weight_url = URL_BASE + f'weights/{config_name}.pdparam'
            weight_path = download.get_weights_path_from_url(weight_url)
            state_dict = paddle.load(weight_path)
            self.load_dict(state_dict)
            self.eval()
    def __init__(self, bond_angle_float_names, embed_dim, rbf_params=None):
        super(BondAngleFloatRBF, self).__init__()
        self.bond_angle_float_names = bond_angle_float_names

        if rbf_params is None:
            self.rbf_params = {
                'bond_angle': (np.arange(0, np.pi,
                                         0.1), 10.0),  # (centers, gamma)
            }
        else:
            self.rbf_params = rbf_params

        self.linear_list = nn.LayerList()
        self.rbf_list = nn.LayerList()
        for name in self.bond_angle_float_names:
            centers, gamma = self.rbf_params[name]
            rbf = RBF(centers, gamma)
            self.rbf_list.append(rbf)
            linear = nn.Linear(len(centers), embed_dim)
            self.linear_list.append(linear)
Example #30
0
    def __init__(self,
                 img_size=384,
                 patch_size=16,
                 class_dim=1000,
                 embed_dim=768,
                 depth=12,
                 num_heads=12,
                 mlp_ratio=4,
                 qkv_bias=False,
                 norm_layer='nn.LayerNorm',
                 epsilon=1e-5,
                 **kwargs):
        # ViT 结构
        super().__init__(img_size=img_size,
                         patch_size=patch_size,
                         class_dim=class_dim,
                         embed_dim=embed_dim,
                         depth=depth,
                         num_heads=num_heads,
                         mlp_ratio=mlp_ratio,
                         qkv_bias=qkv_bias,
                         norm_layer=norm_layer,
                         epsilon=epsilon,
                         **kwargs)
        # 由于增加了 distillation token,所以也需要调整位置编码的长度
        self.pos_embed = self.create_parameter(
            shape=(1, self.patch_embed.num_patches + 2, self.embed_dim),
            default_initializer=zeros_)
        self.add_parameter("pos_embed", self.pos_embed)
        # distillation token
        self.dist_token = self.create_parameter(shape=(1, 1, self.embed_dim),
                                                default_initializer=zeros_)
        self.add_parameter("cls_token", self.cls_token)
        # Classifier head
        self.head_dist = nn.Linear(
            self.embed_dim,
            self.class_dim) if self.class_dim > 0 else Identity()

        trunc_normal_(self.dist_token)
        trunc_normal_(self.pos_embed)
        self.head_dist.apply(self._init_weights)