예제 #1
0
파일: model.py 프로젝트: ruizewang/CMKP
    def __init__(self, vocab_size, embed_size, hidden_size, memory_bank_size,
                 copy_attn, pad_idx, dropout, cur_model):
        super(RNNDecoder, self).__init__()
        self.embed_size = embed_size
        self.hidden_size = hidden_size
        self.vocab_size = vocab_size
        self.memory_bank_size = memory_bank_size
        self.dropout = nn.Dropout(dropout)
        self.copy_attn = copy_attn
        self.pad_token = pad_idx

        self.cur_model = cur_model
        self.use_img = 'img' in self.cur_model
        self.use_attr = 'attr' in self.cur_model

        self.embedding = nn.Embedding(self.vocab_size, self.embed_size,
                                      self.pad_token)

        self.input_size = embed_size

        self.rnn = nn.GRU(input_size=self.input_size,
                          hidden_size=hidden_size,
                          num_layers=1,
                          bidirectional=False,
                          batch_first=False)

        self.attention_layer = Attention(decoder_size=hidden_size,
                                         memory_bank_size=memory_bank_size,
                                         need_mask=True)

        self.combine_pred = 'combine' in cur_model
        self.combine_pred_type = 'direct' if 'direct' in cur_model else 'embed'
        if self.combine_pred:
            if self.combine_pred_type == 'embed':
                self.pred_att = Attention(decoder_size=hidden_size,
                                          memory_bank_size=memory_bank_size,
                                          need_mask=True)
            self.cls_pred_p_gen_linear = nn.Linear(
                embed_size + hidden_size + memory_bank_size, 1)

        if copy_attn:
            self.p_gen_linear = nn.Linear(
                embed_size + hidden_size + memory_bank_size, 1)

        self.sigmoid = nn.Sigmoid()

        self.vocab_dist_linear_1 = nn.Linear(hidden_size + memory_bank_size,
                                             hidden_size)
        self.vocab_dist_linear_2 = nn.Linear(hidden_size, vocab_size)
        self.softmax = MaskedSoftmax(dim=1)
예제 #2
0
 def __init__(self, layer_size=8, input_channels=3, att=False):
     super().__init__()
     self.layer_size = layer_size
     self.enc_1 = VSRLayer(3, 64, kernel_size=7)
     self.enc_2 = VSRLayer(64, 128, kernel_size=5)
     self.enc_3 = PConvLayer(128, 256, sample='down-5')
     self.enc_4 = PConvLayer(256, 512, sample='down-3')
     for i in range(4, self.layer_size):
         name = 'enc_{:d}'.format(i + 1)
         setattr(self, name, PConvLayer(512, 512, sample='down-3'))
     self.deconv = nn.ConvTranspose2d(512, 512, 4, 2, 1)
     for i in range(4, self.layer_size):
         name = 'dec_{:d}'.format(i + 1)
         setattr(self, name,
                 PConvLayer(512 + 512, 512, activ='leaky', deconv=True))
     self.dec_4 = PConvLayer(512 + 256, 256, activ='leaky', deconv=True)
     if att:
         self.att = Attention.AttentionModule()
     else:
         self.att = lambda x: x
     self.dec_3 = PConvLayer(256 + 128, 128, activ='leaky', deconv=True)
     self.dec_2 = VSRLayer(128 + 64,
                           64,
                           stride=1,
                           activation='leaky',
                           deconv=True)
     self.dec_1 = VSRLayer(64 + input_channels,
                           64,
                           stride=1,
                           activation=None,
                           batch_norm=False)
     self.resolver = Bottleneck(64, 16)
     self.output = nn.Conv2d(128, 3, 1)
예제 #3
0
    def __init__(self, vocab, config, pretrained_embedding):
        super(HANEncoder, self).__init__()
        self.config = config
        self.word_embed = nn.Embedding(vocab.vocab_size,
                                       config.word_dims,
                                       padding_idx=0)
        self.extword_embed = nn.Embedding(vocab.extvocab_size,
                                          config.word_dims,
                                          padding_idx=0)

        word_init = np.zeros((vocab.vocab_size, config.word_dims),
                             dtype=np.float32)
        self.word_embed.weight.data.copy_(torch.from_numpy(word_init))

        self.extword_embed.weight.data.copy_(
            torch.from_numpy(pretrained_embedding))
        self.extword_embed.weight.requires_grad = False

        self.role_embed = nn.Embedding(vocab.role_size,
                                       config.role_dims,
                                       padding_idx=0)
        role_init = np.zeros((vocab.role_size, config.role_dims),
                             dtype=np.float32)
        self.role_embed.weight.data.copy_(torch.from_numpy(role_init))

        self.sent_lstm = MyLSTM(
            input_size=config.word_dims,
            hidden_size=config.lstm_hiddens,
            num_layers=config.lstm_layers,
            batch_first=True,
            bidirectional=True,
            dropout_in=config.dropout_lstm_input,
            dropout_out=config.dropout_lstm_hidden,
        )

        self.turn_lstm = MyLSTM(
            input_size=config.lstm_hiddens * 2,
            hidden_size=config.lstm_hiddens,
            num_layers=config.lstm_layers,
            batch_first=True,
            bidirectional=True,
            dropout_in=config.dropout_lstm_input,
            dropout_out=config.dropout_lstm_hidden,
        )

        self.sent_att = Attention(config.lstm_hiddens * 2)
        self.turn_att = Attention(config.lstm_hiddens * 2)
예제 #4
0
    def __init__(self, embedding_size, hidden_size, seq_len, n_glimpses,
                 tanh_exploration):
        super(RNNTSP, self).__init__()

        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.n_glimpses = n_glimpses
        self.seq_len = seq_len

        self.embedding = GraphEmbedding(2, embedding_size)
        self.encoder = nn.LSTM(embedding_size, hidden_size, batch_first=True)
        self.decoder = nn.LSTM(embedding_size, hidden_size, batch_first=True)
        self.pointer = Attention(hidden_size, C=tanh_exploration)
        self.glimpse = Attention(hidden_size)

        self.decoder_start_input = nn.Parameter(
            torch.FloatTensor(embedding_size))
        self.decoder_start_input.data.uniform_(
            -(1. / math.sqrt(embedding_size)), 1. / math.sqrt(embedding_size))
 def __init__(self, class_num, word_embedding_matrix, position1_embedding_matrix, \
     position2_embedding_matrix, filters, kernel_size, padding=0, activation=nn.Tanh(), \
     dropout=0.5, nis_hidden_dims=[]):
     super(APCNN_NIS, self).__init__()
     self.filters = filters
     word_dim = word_embedding_matrix.shape[1]
     self.embedding = Embedding(word_embedding_matrix, position1_embedding_matrix, position2_embedding_matrix)
     self.pcnn = PCNN(1, filters, kernel_size, padding=padding, activation=activation, dropout=dropout)
     self.attention = Attention(3 * filters + word_dim, activation=activation)
     self.attention_weight = None
     self.nis = NIS(3 * filters, nis_hidden_dims)
     self.linear = nn.Linear(3 * filters, class_num, bias=True)
예제 #6
0
    def __init__(self, config, device):
        super(CRNNet, self).__init__()
        self.config = config
        self.stages = {
            'Trans': config['transform'],
            'Feat': config['backbone'],
            'Seq': config['sequence'],
            'Pred': config['prediction']
        }

        if config['transform'] == 'TPS':
            self.Transformation = TPS_SpatialTransformerNetwork(
                F=config['num_fiducial'],
                im_size=(config['height'], config['width']),
                im_rectified=(config['height'], config['width']),
                num_channels=config['input_channel'],
                device=device)
        else:
            print('No tps specified')
        if config['backbone'] == 'ResNet':
            self.FeatureExtraction = ResNet_FeatureExtractor(
                config['input_channel'], config['output_channel'])
        else:
            raise Exception('No backbone module specified')
        self.FeatureExtraction_output = config[
            'output_channel']  # int(imgH/16-1) * 512
        self.AdaptiveAvgPool = nn.AdaptiveAvgPool2d(
            (None, 1))  # Transform final (imgH/16-1) -> 1

        if config['sequence'] == 'biLSTM':
            self.SequenceModeling = nn.Sequential(
                BidirectionalLSTM(self.FeatureExtraction_output,
                                  config['hidden_size'],
                                  config['hidden_size']),
                BidirectionalLSTM(config['hidden_size'], config['hidden_size'],
                                  config['hidden_size']))
            self.SequenceModeling_output = config['hidden_size']
        else:
            print('No sequence module specified')
            self.SequenceModeling_output = self.FeatureExtraction_output

        if config['prediction'] == 'CTC':
            self.Prediction = nn.Linear(self.SequenceModeling_output,
                                        config['num_classes'])
        elif config['prediction'] == 'Attention':
            self.Prediction = Attention(self.SequenceModeling_output,
                                        config['hidden_size'],
                                        config['num_classes'],
                                        device=device)
        else:
            raise Exception(
                'prediction needs to be either CTC or attention-based sequence prediction'
            )
예제 #7
0
    def __init__(
        self,
        dim,
        depth,
        heads,
        mlp_ratio=4.0,
        attn_dropout=0.0,
        dropout=0.0,
        qkv_bias=True,
        revised=False,
    ):
        super().__init__()
        self.layers = nn.ModuleList([])

        assert isinstance(
            mlp_ratio, float
        ), "MLP ratio should be an integer for valid "
        mlp_dim = int(mlp_ratio * dim)

        for _ in range(depth):
            self.layers.append(
                nn.ModuleList(
                    [
                        PreNorm(
                            dim,
                            Attention(
                                dim,
                                num_heads=heads,
                                qkv_bias=qkv_bias,
                                attn_drop=attn_dropout,
                                proj_drop=dropout,
                            ),
                        ),
                        PreNorm(
                            dim,
                            FeedForward(dim, mlp_dim, dropout_rate=dropout,),
                        )
                        if not revised
                        else FeedForward(
                            dim, mlp_dim, dropout_rate=dropout, revised=True,
                        ),
                    ]
                )
            )
예제 #8
0
 def __init__(self,
              input_size,
              hidden_size,
              text_embed_size,
              reduction_factor=2):
     super(Decoder_Mel, self).__init__()
     self.text_embed_size = text_embed_size
     self.prenet = Prenet(input_size=input_size,
                          hidden_size=hps.prenet_size[0],
                          output_size=hidden_size // 2,
                          dropout_rate=hps.prenet_dropout_rate)
     self.attnRNN = AttentionRNN(input_size=hidden_size // 2,
                                 hidden_size=hidden_size,
                                 text_embed_size=text_embed_size)
     self.attn = Attention(query_size=hidden_size,
                           context_size=text_embed_size)
     self.decRNN = DecoderRNN(input_size=hidden_size + text_embed_size,
                              output_size=hps.n_mels,
                              r=reduction_factor)
예제 #9
0
    def __init__(self,
                 rnn_hidden_dims,
                 attn_hidden_dims,
                 hops,
                 word_embedding):
        super(SelfAttentionNetwork, self).__init__()

        self._rnn_hidden_dims = rnn_hidden_dims
        self._attn_hidden_dims = attn_hidden_dims
        self._device = None

        self.embedding_dims = self._rnn_hidden_dims * 2
        self.hops = hops

        self._embedding = word_embedding
        self._embedding_dims = self._embedding._embedding_dim

        self._birnn = BiRNN(self._embedding_dims, self._rnn_hidden_dims)
        self._attention = Attention(self._rnn_hidden_dims * 2, self._attn_hidden_dims, self.hops)
예제 #10
0
    def __init__(self, vocab_size, encoder_dim):
        super(Decoder, self).__init__()
        self.hidden_size = HIDDEN_SIZE
        self.encoder_dim = encoder_dim
        self.num_pixels = FEATURE_MAP_DIM * FEATURE_MAP_DIM
        self.embedding_size = EMBEDDING_SIZE
        self.attention_dim = ATTENTION_DIM
        self.vocab_size = vocab_size
        self.num_layers = NUM_LAYERS_DECODER

        self.embedding = nn.Embedding(vocab_size, self.embedding_size, padding_idx=3).to(DEVICE)
        self.attention = Attention(encoder_dim=self.encoder_dim,
                                   decoder_dim=self.hidden_size,
                                   attention_dim=self.attention_dim)

        self.f_beta = nn.Linear(self.hidden_size, self.encoder_dim)  # linear layer to create a sigmoid-activated gate
        self.sigmoid = nn.Sigmoid()
        self.input_drop = VariationalDropout(INPUT_DROPOUT, batch_first=True)
        self.output_drop = VariationalDropout(OUTPUT_DROPOUT, batch_first=True)

        # TODO: change to LSTMCell! (Multiple Layers?)

        self.decode_step = nn.LSTMCell(self.embedding_size + self.encoder_dim,
                                       self.hidden_size, bias=True)
예제 #11
0
파일: model.py 프로젝트: ruizewang/CMKP
    def __init__(self, opt):
        """Initialize model."""
        super(MultimodalEncoder, self).__init__()
        self.data_path = opt.data_path
        self.emb_path = opt.emb_path
        self.bidirectional = opt.bidirectional
        self.num_directions = 2 if self.bidirectional else 1
        self.hidden_size = opt.hidden_size
        self.bi_hidden_size = self.num_directions * opt.hidden_size
        opt.bi_hidden_size = self.bi_hidden_size

        self.cur_model = opt.cur_model
        self.use_text = opt.use_text
        assert self.use_text
        self.use_img = opt.use_img
        self.use_attr = opt.use_attr

        self.img_ext_model = opt.img_ext_model

        self.text_pooling_type = 'avg' if 'avg_text' in opt.cur_model else 'max'  # default is max
        self.img_pooling_type = 'max' if 'max_img' in opt.cur_model else 'avg'  # default is avg
        self.attr_pooling_type = 'avg' if 'avg_attr' in opt.cur_model else 'max'  # default is max

        self.embedding = nn.Embedding(opt.vocab_size, opt.emb_size,
                                      opt.pad_idx)
        self.init_weights(opt.emb_type, opt.pad_idx)

        self.rnn = nn.GRU(input_size=opt.emb_size,
                          hidden_size=opt.hidden_size,
                          num_layers=opt.num_layers,
                          bidirectional=opt.bidirectional,
                          batch_first=True,
                          dropout=opt.dropout)

        if 'text_self_att' in self.cur_model:
            self.attention = Attention(self.bi_hidden_size,
                                       self.bi_hidden_size,
                                       need_mask=True)

        if 'text_self_co_att' in self.cur_model:
            self.text_self_co_att = CoAttention(self.bi_hidden_size,
                                                self.bi_hidden_size,
                                                input_type='text_text')

        if self.use_img:
            # resnet/butd: 2048, vgg: 512
            self.raw_img_feat_size = 2048 if 'resnet' in opt.img_ext_model or 'butd' in opt.img_ext_model else 512
            self.linear_img = nn.Linear(self.raw_img_feat_size,
                                        self.bi_hidden_size)
            # single-attention
            if 'text_img_att' in self.cur_model:
                self.text_img_att = Attention(self.bi_hidden_size,
                                              self.bi_hidden_size)
            if 'img_text_att' in self.cur_model:
                self.img_text_att = Attention(self.bi_hidden_size,
                                              self.bi_hidden_size,
                                              need_mask=True)
            if 'text_img_add_text_att' in self.cur_model:
                self.text_img_add_text_att = Attention(2 * self.bi_hidden_size,
                                                       self.bi_hidden_size,
                                                       need_mask=True)

            if 'text_img_co_att' in self.cur_model:
                self.text_img_co_att = CoAttention(self.bi_hidden_size,
                                                   self.bi_hidden_size,
                                                   input_type='text_img')
            if 'img_text_co_att' in self.cur_model:
                self.img_text_co_att = CoAttention(self.bi_hidden_size,
                                                   self.bi_hidden_size,
                                                   input_type='img_text')

        # co-attention
        if 'multi_head_att' in self.cur_model:
            # ['img_text_multi_head_att_h4_d256', 'text_img_multi_head_att_h4_d256',]
            # We hard code the head number and the dimension of the subspace into model name
            # 'img_text_multi_head_att_h1_d128'==> head: 1, dim: 128

            # default setting
            self.is_regu = True if 'regu' in self.cur_model else False
            n_head, d_kv, stack_num = get_multi_head_att_paras(self.cur_model)

            if 'img_text_multi_head_att' in self.cur_model:
                self.img_text_multi_head_att = nn.ModuleList([
                    MyMultiHeadAttention(n_head,
                                         self.bi_hidden_size,
                                         d_kv,
                                         dropout=opt.dropout,
                                         need_mask=True,
                                         is_regu=self.is_regu)
                    for _ in range(stack_num)
                ])
            elif 'text_img_multi_head_att' in self.cur_model:
                self.text_img_multi_head_att = nn.ModuleList([
                    MyMultiHeadAttention(n_head,
                                         self.bi_hidden_size,
                                         d_kv,
                                         dropout=opt.dropout,
                                         need_mask=False)
                    for _ in range(stack_num)
                ])
            elif 'attr_text_multi_head_att' in self.cur_model:
                self.attr_text_multi_head_att = nn.ModuleList([
                    MyMultiHeadAttention(n_head,
                                         self.bi_hidden_size,
                                         d_kv,
                                         dropout=opt.dropout,
                                         need_mask=True)
                    for _ in range(stack_num)
                ])
            elif 'img_attr_add_text_multi_head_att' in self.cur_model:
                self.img_attr_add_text_multi_head_att = nn.ModuleList([
                    MyMultiHeadAttention(n_head,
                                         self.bi_hidden_size,
                                         d_kv,
                                         dropout=opt.dropout,
                                         need_mask=True)
                    for _ in range(stack_num)
                ])
            elif 'img_attr_sep_text_multi_head_att' in self.cur_model:
                self.img_sep_text_multi_head_att = nn.ModuleList([
                    MyMultiHeadAttention(n_head,
                                         self.bi_hidden_size,
                                         d_kv,
                                         dropout=opt.dropout,
                                         need_mask=True)
                    for _ in range(stack_num)
                ])
                self.attr_sep_text_multi_head_att = nn.ModuleList([
                    MyMultiHeadAttention(n_head,
                                         self.bi_hidden_size,
                                         d_kv,
                                         dropout=opt.dropout,
                                         need_mask=True)
                    for _ in range(stack_num)
                ])
            elif 'text_text_multi_head_att' in self.cur_model:
                self.text_text_multi_head_att = nn.ModuleList([
                    MyMultiHeadAttention(n_head,
                                         self.bi_hidden_size,
                                         d_kv,
                                         dropout=opt.dropout,
                                         need_mask=True)
                    for _ in range(stack_num)
                ])
            else:
                raise NotImplementedError

        if self.use_attr:
            self.linear_attr = nn.Linear(opt.emb_size, self.bi_hidden_size)
            if 'text_attr_att' in self.cur_model:
                self.text_attr_att = Attention(self.bi_hidden_size,
                                               self.bi_hidden_size)
            if 'attr_text_att' in self.cur_model:
                self.attr_text_att = Attention(self.bi_hidden_size,
                                               self.bi_hidden_size,
                                               need_mask=True)
            if 'text_attr_add_text_att' in self.cur_model:
                self.text_attr_add_text_att = Attention(2 *
                                                        self.bi_hidden_size,
                                                        self.bi_hidden_size,
                                                        need_mask=True)
            if 'text_attr_real_add_text_att' in self.cur_model:
                self.text_attr_real_add_text_att = Attention(
                    self.bi_hidden_size, self.bi_hidden_size, need_mask=True)
            elif 'text_attr_co_att' in self.cur_model:
                self.text_attr_co_att = CoAttention(self.bi_hidden_size,
                                                    self.bi_hidden_size,
                                                    input_type='text_img')
            elif 'attr_text_co_att' in self.cur_model:
                self.attr_text_co_att = CoAttention(self.bi_hidden_size,
                                                    self.bi_hidden_size,
                                                    input_type='img_text')

        self.dropout = nn.Dropout(p=opt.dropout)