def __init__(self, options, inp_dim): super(MOCKINGJAY, self).__init__() all_states = torch.load(options["ckpt_file"], map_location='cpu') self.config = all_states['Settings']['Config'] self.no_grad = bool(strtobool(options["no_grad"])) # increase dropout if str(options['dropout']) != 'default': self.config['mockingjay']['hidden_dropout_prob'] = float( options['dropout']) self.config['mockingjay']['attention_probs_dropout_prob'] = float( options['dropout']) # Model Config self.model_config = MockingjayConfig(self.config) self.dr = self.model_config.downsample_rate self.hidden_size = self.model_config.hidden_size # Build model self.device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') self.model = MockingjayModel(self.model_config, inp_dim).to(self.device) # Load from a PyTorch state_dict load = bool(strtobool(options["load_pretrain"])) if load: self.load_model(all_states['Mockingjay']) print('[Mockingjay] - Number of parameters: ' + str( sum(p.numel() for p in self.model.parameters() if p.requires_grad))) self.out_dim = 768 # This attribute is for pytorch-kaldi
def set_model(self, inference=False, with_head=False, from_path=None, output_attention=False): self.verbose('Initializing Mockingjay model.') # uild the Mockingjay model with speech prediction head self.model_config = MockingjayConfig(self.config) self.dr = self.model_config.downsample_rate self.hidden_size = self.model_config.hidden_size self.output_attention = output_attention if not inference or with_head: self.model = MockingjayForMaskedAcousticModel(self.model_config, self.input_dim, self.output_dim, self.output_attention).to(self.device) self.verbose('Number of parameters: ' + str(sum(p.numel() for p in self.model.parameters() if p.requires_grad))) self.mockingjay = self.model.Mockingjay if inference and not with_head: self.mockingjay = MockingjayModel(self.model_config, self.input_dim, self.output_attention).to(self.device) self.verbose('Number of parameters: ' + str(sum(p.numel() for p in self.mockingjay.parameters() if p.requires_grad))) self.mockingjay.eval() elif inference and with_head: self.model.eval() elif not inference: self.model.train() # Setup optimizer param_optimizer = list(self.model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [ {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01}, {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} ] num_train_optimization_steps = self.total_steps // self.gradient_accumulation_steps if self.apex: try: from apex.optimizers import FP16_Optimizer from apex.optimizers import FusedAdam except ImportError: raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training.") optimizer = FusedAdam(optimizer_grouped_parameters, lr=self.learning_rate, bias_correction=False, max_grad_norm=1.0) if self.config['optimizer']['loss_scale'] == 0: self.optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True) else: self.optimizer = FP16_Optimizer(optimizer, static_loss_scale=self.config['optimizer']['loss_scale']) self.warmup_linear = WarmupLinearSchedule(warmup=self.warmup_proportion, t_total=num_train_optimization_steps) else: self.optimizer = BertAdam(optimizer_grouped_parameters, lr=self.learning_rate, warmup=self.warmup_proportion, t_total=num_train_optimization_steps) else: raise NotImplementedError('Invalid Arguments!') if self.load: # This will be set to True by default when Tester is running set_model() self.load_model(inference=inference, with_head=with_head, from_path=from_path)
def __init__(self, options, inp_dim): super(MOCKINGJAY, self).__init__() all_states = torch.load(options["ckpt_file"], map_location='cpu') self.config = all_states['Settings']['Config'] self.no_grad = bool(strtobool(options['no_grad'])) self.spec_aug = bool(strtobool(options['spec_aug'])) self.spec_aug_prev = bool(strtobool(options['spec_aug_prev'])) self.weighted_sum = bool(strtobool(options['weighted_sum'])) self.select_layer = int(options['select_layer']) if (not self.no_grad) and (not self.spec_aug_prev): raise RuntimeError('Only one of them can be set False!') # increase dropout if str(options['dropout']) != 'default': self.config['mockingjay']['hidden_dropout_prob'] = float(options['dropout']) self.config['mockingjay']['attention_probs_dropout_prob'] = float(options['dropout']) # Model Config self.model_config = MockingjayConfig(self.config) self.dr = self.model_config.downsample_rate self.hidden_size = self.model_config.hidden_size self.num_layers = self.model_config.num_hidden_layers if not (self.select_layer in list(range(-1, self.num_layers))): raise RuntimeError('Out of range int for \'select_layer\'!') # use weighted sum from all layers if self.weighted_sum: self.weight = nn.Parameter(torch.ones(self.num_layers) / self.num_layers) # Build model self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') self.model = MockingjayModel(self.model_config, inp_dim).to(self.device) self.model.eval() if self.no_grad else self.model.train() # Load from a PyTorch state_dict load = bool(strtobool(options["load_pretrain"])) if load: self.load_model(all_states['Mockingjay']) print('[Mockingjay] - Number of parameters: ' + str(sum(p.numel() for p in self.model.parameters() if p.requires_grad))) self.out_dim = self.hidden_size # 768, This attribute is for pytorch-kaldi