Beispiel #1
0
    def __init__(self, options, inp_dim, config=None, online_config=None):
        super(SPEC_TRANSFORMER, self).__init__(options, inp_dim, config, online_config)

        # build head
        self.SpecHead = TransformerSpecPredictionHead(self.model_config, inp_dim).to(self.device)
        self.SpecHead.eval() if self.no_grad else self.SpecHead.train()
        
        # Load from a PyTorch state_dict
        if self.load:
            self.SpecHead.load_state_dict(self.all_states['SpecHead'])
            print('[Spec Transformer] - Number of parameters: ' + str(sum(p.numel() for p in self.SpecHead.parameters() if p.requires_grad)))
    def __init__(self,
                 config,
                 input_dim,
                 output_dim,
                 output_attentions=False,
                 keep_multihead_output=False):
        super(DualTransformerForMaskedAcousticModel,
              self).__init__(config, output_attentions)

        assert config.dual_transformer, 'This config attribute should be set to True!'
        self.use_pe = config.intermediate_pe
        self.combine = config.combine
        self.phone_dim = config.phone_dim
        self.speaker_dim = config.speaker_dim
        if config.combine == 'concat':
            code_dim = self.phone_dim + self.speaker_dim
        elif config.combine == 'add':
            assert self.phone_dim == self.speaker_dim
            code_dim = self.phone_dim
        else:
            raise NotImplementedError

        self.SPE = nn.Parameter(
            torch.FloatTensor([1.0])
        )  # Scaled positional encoding (SPE) introduced in https://arxiv.org/abs/1809.08895
        self.SpecTransformer = TransformerModel(
            config,
            input_dim=code_dim,
            output_attentions=output_attentions,
            keep_multihead_output=keep_multihead_output,
            with_input_module=True if self.use_pe else False)
        self.SpecHead = TransformerSpecPredictionHead(
            config, output_dim if output_dim is not None else input_dim)

        if self.phone_dim > 0:
            self.PhoneticTransformer = TransformerPhoneticEncoder(
                config, input_dim, output_attentions, keep_multihead_output)
        if self.speaker_dim > 0:
            self.SpeakerTransformer = TransformerSpeakerEncoder(
                config, input_dim, output_attentions, keep_multihead_output)

        if len(config.pre_train) > 0:
            all_states = torch.load(config.pre_train, map_location='cpu')
            if self.phone_dim > 0:
                self.PhoneticTransformer.Transformer = load_model(
                    self.PhoneticTransformer.Transformer,
                    all_states['Transformer'])
            if self.speaker_dim > 0:
                self.SpeakerTransformer.Transformer = load_model(
                    self.SpeakerTransformer.Transformer,
                    all_states['Transformer'])

        self.apply(self.init_Transformer_weights)
        self.loss = nn.L1Loss()
class SPEC_TRANSFORMER(TRANSFORMER):
    def __init__(self, options, inp_dim, config=None, online_config=None):
        super(SPEC_TRANSFORMER, self).__init__(options, inp_dim, config,
                                               online_config)

        # build head
        self.SpecHead = TransformerSpecPredictionHead(self.model_config,
                                                      inp_dim).to(self.device)
        self.SpecHead.eval() if self.no_grad else self.SpecHead.train()

        # Load from a PyTorch state_dict
        if self.load:
            self.SpecHead.load_state_dict(self.all_states['SpecHead'])
            print('[Spec Transformer] - Number of parameters: ' + str(
                sum(p.numel()
                    for p in self.SpecHead.parameters() if p.requires_grad)))

    def forward(self, x):
        if hasattr(self, 'preprocessor'):
            x = self.preprocessor(x.transpose(1, 2).contiguous())[0]
        if self.no_grad:
            with torch.no_grad():
                x = self._forward(x)
                x, _ = self.SpecHead(x)
        else:
            x = self._forward(x)
            x, _ = self.SpecHead(x)
        return x