Esempio n. 1
0
    def __init__(self, opt):
        super(MoreSupWeightModel, self).__init__()
        self.vocab_size = opt.vocab_size
        self.input_encoding_size = opt.input_encoding_size
        self.rnn_type = opt.rnn_type
        self.rnn_size = opt.rnn_size
        self.num_layers = opt.num_layers
        self.drop_prob_lm = opt.drop_prob_lm
        self.seq_length = opt.seq_length
        self.fc_feat_size = opt.fc_feat_size
        self.att_feat_size = opt.att_feat_size
        self.att_size = opt.att_size
        self.batch_size = opt.batch_size * opt.seq_per_img
        self.rnn_atten = opt.rnn_atten
        self.num_parallels = opt.num_parallels
        self.sample_rate = opt.sample_rate
        self.use_linear = opt.use_linear
        self.rnn_size_list = opt.rnn_size_list
        self.gram_num = opt.gram_num
        self.logprob_pool_type = opt.logprob_pool_type # 0 mean 1 max

        # reviewnet
        self.use_reviewnet = opt.use_reviewnet
        if self.use_reviewnet == 1:
            self.review_length = opt.review_length
            self.review_nets = nn.ModuleList()
            for i in range(self.review_length):
                self.review_nets[i] = LSTM.LSTM_SOFT_ATT_NOX(self.rnn_size, self.att_size, self.drop_prob_lm)
            opt.att_size = self.review_length

        # LSTM
        # opt.input_encoding_size = opt.input_encoding_size * 2
        self.core = rnn_utils.get_lstm(opt)

        if self.rnn_atten == "ATT_LSTM":
            self.atten = LSTM.LSTM_ATTEN_LAYER(self.rnn_size)

        # self.vocab_size + 1 -> self.input_encoding_size
        # self.vocab_size + 1 -> self.input_encoding_size
        if self.gram_num > 0:
            self.embed = nn.Sequential(nn.Embedding(self.vocab_size + 1, self.input_encoding_size),
                                       Embed.WordEmbed(self.gram_num))
            # self.embed_tc = nn.Sequential(nn.Embedding(self.vocab_size + 1, self.input_encoding_size),
            #                            Embed.WordEmbed(self.gram_num))
            # self.embed = nn.Embedding(self.vocab_size + 1, self.input_encoding_size)
            # self.embed_tc = nn.Embedding(self.vocab_size + 1, self.input_encoding_size)
        else:
            self.embed = nn.Embedding(self.vocab_size + 1, self.input_encoding_size)

        self.embed_tc = nn.Embedding(self.vocab_size + 1, self.input_encoding_size)

        if self.use_linear:
            # (batch_size * fc_feat_size) -> (batch_size * input_encoding_size)
            self.img_embed = nn.Linear(self.fc_feat_size, self.input_encoding_size)
            self.att_embed = nn.Linear(self.att_feat_size, self.input_encoding_size)

            # self.relu = nn.RReLU(inplace=True)
            self.relu = nn.ReLU()
            self.init_weight()
Esempio n. 2
0
    def __init__(self, opt):
        super(DoubleAttenMModel, self).__init__()
        self.vocab_size = opt.vocab_size
        self.input_encoding_size = opt.input_encoding_size
        self.rnn_type = opt.rnn_type
        self.rnn_size = opt.rnn_size
        self.num_layers = opt.num_layers
        self.drop_prob_lm = opt.drop_prob_lm
        self.seq_length = opt.seq_length
        self.fc_feat_size = opt.fc_feat_size
        self.att_feat_size = opt.att_feat_size
        self.att_size = opt.att_size
        self.batch_size = opt.batch_size * opt.seq_per_img
        self.rnn_atten = opt.rnn_atten
        self.num_layers = opt.num_layers
        self.num_parallels = opt.num_parallels
        self.sample_rate = opt.sample_rate
        self.use_linear = opt.use_linear
        self.rnn_size_list = opt.rnn_size_list

        # reviewnet
        self.use_reviewnet = opt.use_reviewnet
        if self.use_reviewnet == 1:
            self.review_length = opt.review_length
            self.review_nets = nn.ModuleList()
            for i in range(self.review_length):
                self.review_nets[i] = LSTM.LSTM_SOFT_ATT_NOX(
                    self.rnn_size, self.att_size, self.drop_prob_lm)
            opt.att_size = self.review_length

        # LSTM
        self.core = rnn_utils.get_lstm(opt)

        if self.rnn_atten == "ATT_LSTM":
            self.atten = LSTM.LSTM_ATTEN_LAYER(self.rnn_size)

        # self.vocab_size + 1 -> self.input_encoding_size
        self.embed = nn.Embedding(self.vocab_size + 1,
                                  self.input_encoding_size)
        # self.embed_tc = nn.Embedding(self.vocab_size + 1, self.input_encoding_size)

        if self.use_linear:
            # (batch_size * fc_feat_size) -> (batch_size * input_encoding_size)
            self.img_embed = nn.Linear(self.fc_feat_size, self.rnn_size)
            self.att_embed = nn.Linear(self.att_feat_size, self.rnn_size)

            # self.relu = nn.RReLU(inplace=True)
            self.relu = nn.ReLU()
            self.init_weight()