def __init__(self, args): super().__init__(args) # Get the correct dataset directory if args.dataset == 'fsc': data_dir = 'fluent' num_classes = 31 elif args.dataset == 'snips': data_dir = 'snips_slu' num_classes = 6 else: raise ValueError("No valid dataset selected!") # Define the joint model self.model = JointModel( input_dim=40, num_layers=args.num_enc_layers, num_classes=num_classes, encoder_dim=args.enc_dim, bert_pretrained=not args.bert_random_init, bert_pretrained_model_name=args.bert_model_name) print(self.model) # Set the Device and Distributed Settings self.device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') if args.distributed and torch.cuda.is_available( ) and torch.cuda.device_count() > 1: self.model = torch.nn.DataParallel(self.model) self.model.to(self.device) # Define the data loaders self.train_loader, \ self.val_loader, \ self.test_loader = get_triplet_dataloaders(data_root=data_dir, batch_size=args.batch_size, dataset=args.dataset, num_workers=args.num_workers, pretrained_model_name=args.bert_model_name) # Define the optimizers self.optimizer = torch.optim.Adam( [{ 'params': self.model.bert.parameters(), 'lr': args.learning_rate_bert }, { 'params': self.model.speech_encoder.parameters() }, { 'params': self.model.classifier.parameters() }], lr=args.learning_rate) self._init_scheduler(args) # Parameters for the losses self.weight_text = args.weight_text self.weight_embedding = args.weight_embedding self.margin = args.margin
def __init__(self, args): # Get the dataset directory if args.data_path: data_dir = args.data_path else: raise ValueError("No data path was given!") # Set the dataset argument if args.dataset == 'fsc': num_classes = 31 elif args.dataset == 'snips': num_classes = 6 elif args.dataset == 'slurp': num_classes = 91 args.dataset = 'snips' else: raise ValueError("No valid dataset selected!") print('Dataset: ', args.dataset) # Define the joint model self.model = JointModel( input_dim=40, num_layers=args.num_enc_layers, num_classes=num_classes, encoder_dim=args.enc_dim, bert_pretrained=not args.bert_random_init, bert_pretrained_model_name=args.bert_model_name, config=args) print(self.model) # Set the Device and Distributed Settings self.device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') if args.distributed and torch.cuda.is_available( ) and torch.cuda.device_count() > 1: self.model = torch.nn.DataParallel(self.model) self.model.to(self.device) # Define the data loaders self.train_loader, \ self.val_loader, \ self.test_loader = get_triplet_dataloaders(data_root=data_dir, batch_size=args.batch_size, dataset=args.dataset, num_workers=args.num_workers, pretrained_model_name=args.bert_model_name) # Define the optimizers if args.finetune_bert: print('Finetuning BERT') self.optimizer = torch.optim.Adam( [{ 'params': self.model.bert.parameters(), 'lr': args.learning_rate_bert }, { 'params': self.model.acoustic_encoder.parameters() }, { 'params': self.model.classifier.parameters() }], lr=args.learning_rate) else: print('Freezing BERT') self.optimizer = torch.optim.Adam( [{ 'params': self.model.acoustic_encoder.parameters() }, { 'params': self.model.classifier.parameters() }], lr=args.learning_rate) # Parameters for the losses self.weight_text = args.weight_text self.weight_embedding = args.weight_embedding self.margin = args.margin super().__init__(args)