Beispiel #1
0
    def forward(self, data, target, *mems):
        # nn.DataParallel does not allow size(0) tensors to be broadcasted.
        # So, have to initialize size(0) mems inside the model forward.
        # Moreover, have to return new_mems to allow nn.DataParallel to piece
        # them together.
        if not mems:
            mems = self.init_mems()

        tgt_len = target.size(0)
        # print(f'data = {data.shape}')
        hidden, new_mems = self._forward(data, mems=mems)
        pred_hid = hidden[
            -tgt_len:]    # 预测的结果,利用了之前的信息,hidden维度: (mems + tgt_len)  * 4 * 200 即 36 x 4 x 200
        
        # sample_softmax 随机选择n个词就行softmax,否则所有词
        if self.sample_softmax > 0 and self.training:
            # self.tie_weight 控制是否共享词向量参数  self.out_layer:就是转化为词表概率的线性层
            assert self.tie_weight
            logit = sample_logits(self.word_emb, self.out_layer.bias, target, pred_hid,
                                  self.sampler)
            loss = -F.log_softmax(logit, -1)[:, :, 0]
        else:
            # print(f'pred_hid shape = {pred_hid.shape}')
            # pred_hid.view(-1, pred_hid.size(-1)): 144 x 200, target.view(-1): 144
            loss = self.crit(pred_hid.view(-1, pred_hid.size(-1)), target.view(-1))    #
            # print(f'loss = {loss.shape}')
            loss = loss.view(tgt_len, -1)    # 36 x 4

        if new_mems is None:
            return [loss]
        else:
            return [loss] + new_mems
    def forward(self, data, target, *mems):
        # nn.DataParallel does not allow size(0) tensors to be broadcasted.
        # So, have to initialize size(0) mems inside the model forward.
        # Moreover, have to return new_mems to allow nn.DataParallel to piece
        # them together.
        if not mems: mems = self.init_mems()

        tgt_len = target.size(0)
        hidden, new_mems = self._forward(data, mems=mems)

        pred_hid = hidden[-tgt_len:]
        if self.sample_softmax > 0 and self.training:
            assert self.tie_weight
            logit = sample_logits(self.word_emb, self.out_layer.bias, target,
                                  pred_hid, self.sampler)
            loss = -F.log_softmax(logit, -1)[:, :, 0]
        else:
            loss = self.crit(pred_hid.view(-1, pred_hid.size(-1)),
                             target.view(-1))
            loss = loss.view(tgt_len, -1)

        if new_mems is None:
            return [loss]
        else:
            return [loss] + new_mems
Beispiel #3
0
    def forward(self, data, target, mems):
        # nn.DataParallel does not allow size(0) tensors to be broadcasted.
        # So, have to initialize size(0) mems inside the model forward.
        # Moreover, have to return new_mems to allow nn.DataParallel to piece
        # them together.

        if mems[0] == ():
            mems_real = self.init_mems()
            mems_phase = self.init_mems()
        else:
            mems_real, mems_phase = mems

        tgt_len = target.size(0)
        hidden, hidden_phase, new_mems, new_mems_phase = self._forward(
            data, mems=mems_real, mems_phase=mems_phase)
        norms = (torch.sqrt(
            torch.mul(hidden, hidden) +
            torch.mul(hidden_phase, hidden_phase))) / 1.5
        pred_hid = norms[-tgt_len:]
        if self.sample_softmax > 0 and self.training:
            assert self.tie_weight
            logit = sample_logits(self.word_emb, self.out_layer.bias, target,
                                  pred_hid, self.sampler)
            loss = -F.log_softmax(logit, -1)[:, :, 0]
        else:
            loss = self.crit(pred_hid.view(-1, pred_hid.size(-1)),
                             target.view(-1))
            loss = loss.view(tgt_len, -1)

        if new_mems is None:
            return [loss]
        else:
            return loss, new_mems, new_mems_phase
    def forward(self, data, target, *mems, model_type="training"):
        # nn.DataParallel does not allow size(0) tensors to be broadcasted.
        # So, have to initialize size(0) mems inside the model forward.
        # Moreover, have to return new_mems to allow nn.DataParallel to piece
        # them together.
        if not mems: mems = self.init_mems()

        tgt_len = target.size(0)
        hidden, new_mems = self._forward(data, mems=mems)
        pred_hid = hidden[-tgt_len:]

        #print("### self.sample_softmax ###",self.sample_softmax)
        if self.sample_softmax > 0 and model_type == "training":
            #if self.sample_softmax > 0 and self.training:
            assert self.tie_weight
            logit = sample_logits(self.word_emb, self.out_layer.bias, target,
                                  pred_hid, self.sampler)
            loss = -F.log_softmax(logit, -1)[:, :, 0]
        elif model_type == "inferrence":
            output = self.crit(pred_hid.view(-1, pred_hid.size(-1)), target,
                               False, model_type)
            #outputTest=output.view(tgt_len,8,-1)
            #loss = loss.view(tgt_len, -1)
            return [output] + new_mems
        else:
            loss, output = self.crit(pred_hid.view(-1, pred_hid.size(-1)),
                                     target.view(-1))
            outputTest = output.view(tgt_len, 1, -1)
            loss = loss.view(tgt_len, -1)

        if new_mems is None:
            return [loss] + [output]
        else:
            return [loss] + [output] + new_mems
    def forward(self, data, target, *mems, use_dropout=True, reg_args=None):
        # nn.DataParallel does not allow size(0) tensors to be broadcasted.
        # So, have to initialize size(0) mems inside the model forward.
        # Moreover, have to return new_mems to allow nn.DataParallel to piece
        # them together.
        drop_list = []
        if not mems: mems = self.init_mems()

        tgt_len = target.size(0)

        if reg_args is None:
            ret_dropped = False
            sample_logit = False
        else:
            ret_dropped = reg_args['exp_reg_type'] != 'none' or reg_args[
                'imp_reg_type'] != 'none'
            sample_logit = reg_args['exp_reg_type'].split(
                '+')[0] == 'jreg_sample_logit'
        if ret_dropped:
            hidden, new_mems, drop_list = self._forward(
                data, mems=mems, use_dropout=use_dropout, ret_dropped=True)
        else:
            hidden, new_mems = self._forward(data,
                                             mems=mems,
                                             use_dropout=use_dropout,
                                             ret_dropped=False)

        pred_hid = hidden[-tgt_len:]
        if self.sample_softmax > 0 and self.training:
            if sample_logit:
                raise NotImplementedError
            assert self.tie_weight
            logit = sample_logits(self.word_emb, self.out_layer.bias, target,
                                  pred_hid, self.sampler)
            loss = -F.log_softmax(logit, -1)[:, :, 0]
            fake_loss = None
        else:
            loss_ret = self.crit(pred_hid.view(-1, pred_hid.size(-1)),
                                 target.view(-1),
                                 sample_losses=sample_logit)
            loss = loss_ret if not sample_logit else loss_ret[0]
            loss = loss.view(tgt_len, -1)
            fake_loss = None if not sample_logit else loss_ret[1].view(-1)

        reg_arr = []

        if reg_args is not None:
            exp_reg = compute_exp_reg(loss, fake_loss, pred_hid, drop_list,
                                      reg_args)
            imp_reg = compute_imp_reg(loss, pred_hid, drop_list, reg_args)

            if reg_args['exp_reg_type'] != 'none': reg_arr.append(exp_reg)
            if reg_args['imp_reg_type'] != 'none': reg_arr.append(imp_reg)

        if new_mems is None:
            return [loss] + reg_arr
        else:
            return [loss] + reg_arr + new_mems