def main(config): use_cuda = config['use_gpu'] device = torch.device("cuda" if use_cuda == 1 else "cpu") model = Multi_modal(config['input_dim'], config['vocab_len'], config['embedding_dim'], config['hidden_dim_AE'], config['hidden_dim_PE']) model = model.to(device) optimizer = ScheduledOptim( optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), betas=(0.9, 0.98), eps=1e-09, weight_decay=1e-4, amsgrad=True), config['n_warmup_steps']) criterion = nn.CrossEntropyLoss() ### Data related dataset_train = SpeechDataGenerator(manifest=args.training_filepath) dataloader_train = DataLoader(dataset_train, batch_size=config['batch_size'], shuffle=True, collate_fn=speech_collate) dataset_test = SpeechDataGenerator(manifest=args.testing_filepath) dataloader_test = DataLoader(dataset_test, batch_size=config['batch_size'], shuffle=True, collate_fn=speech_collate) if not os.path.exists(args.save_modelpath): os.makedirs(args.save_modelpath) best_acc = 0 best_loss = np.inf best_epoch = -1 for epoch in range(1, config['num_epochs'] + 1): train_loss, train_acc = train(model, dataloader_train, epoch, optimizer, device, criterion) val_loss, val_acc = evaluation(model, dataloader_test, epoch, device, criterion) # Save if val_acc > best_acc: best_acc = max(val_acc, best_acc) model_save_path = os.path.join(args.save_modelpath, 'best_check_point_' + str(epoch)) state_dict = { 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'epoch': epoch } torch.save(state_dict, model_save_path) print('best accuracy so far {}'.format(best_acc)) best_epoch = epoch + 1 elif epoch - best_epoch > 2: optimizer.increase_delta() best_epoch = epoch + 1 print('best accuracy so far {}'.format(best_acc))
def main(config,args): use_cuda = config['use_gpu'] device = torch.device("cuda" if use_cuda==1 else "cpu") model = Siamese() model = model.to(device) rec_loss = nn.L1Loss() cosine_loss = nn.CosineSimilarity(dim=1) optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0, betas=(0.9, 0.98), eps=1e-9) dataset_train = SpeechDataGenerator(args.clean_file,args.noisy_file,batch_s=100) dataloader_train = DataLoader(dataset_train, batch_size=1,shuffle=True,collate_fn=speech_collate) for epoch in range(1, config['num_epochs'] + 1): train_loss=train(model,dataloader_train,epoch,optimizer,device,rec_loss,cosine_loss)
def main(args): ### Data related dataset_train = SpeechDataGenerator(manifest=args.training_filepath, mode='train') dataloader_train = DataLoader(dataset_train, batch_size=args.batch_size, shuffle=True, collate_fn=speech_collate) dataset_val = SpeechDataGenerator(manifest=args.validation_filepath, mode='train') dataloader_val = DataLoader(dataset_val, batch_size=args.batch_size, shuffle=True, collate_fn=speech_collate) #dataset_test = SpeechDataGenerator(manifest=args.testing_filepath,mode='test') #dataloader_test = DataLoader(dataset_test, batch_size=args.batch_size,shuffle=True,collate_fn=speech_collate) ## Model related use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") model = RawNet(args.input_dim, args.num_classes).to(device) optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0, betas=(0.9, 0.98), eps=1e-9) ce_loss = nn.CrossEntropyLoss() g2e_loss = GE2ELoss(device) for epoch in range(args.num_epochs): train(model, dataloader_train, epoch, ce_loss, g2e_loss, optimizer, device) validation(model, dataloader_val, epoch, ce_loss, g2e_loss, optimizer, device)
default='meta/training_s1_s2_s3_s4.txt') parser.add_argument('-testing_filepath', type=str, default='meta/testing_s5.txt') parser.add_argument('-input_dim', action="store_true", default=1) parser.add_argument('-num_classes', action="store_true", default=4) parser.add_argument('-lamda_val', action="store_true", default=0.1) parser.add_argument('-batch_size', action="store_true", default=64) parser.add_argument('-use_gpu', action="store_true", default=True) parser.add_argument('-num_epochs', action="store_true", default=100) args = parser.parse_args() ### Data related dataset_test = SpeechDataGenerator(manifest=args.testing_filepath, mode='test') dataloader_test = DataLoader(dataset_test, batch_size=args.batch_size, shuffle=True, collate_fn=speech_collate) ## Model related use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") model = Emo_Raw_TDNN(args.input_dim, args.num_classes).to(device) optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0, betas=(0.9, 0.98), eps=1e-9)
data_path_train = 'meta/training.txt' data_path_test = 'meta/testing.txt' #### Params def speech_collate(batch): targets = [] specs = [] for sample in batch: specs.append(sample['spec']) targets.append((sample['labels'])) return specs, targets ### Data related dataset_train = SpeechDataGenerator(manifest=data_path_train, mode='train') dataloader_train = DataLoader(dataset_train, batch_size=32, shuffle=True, collate_fn=speech_collate) dataset_test = SpeechDataGenerator(manifest=data_path_test, mode='test') dataloader_test = DataLoader(dataset_test, batch_size=32, collate_fn=speech_collate) ## Model related use_cuda = torch.cuda.is_available() #device = torch.device("cuda" if use_cuda else "cpu") device = torch.device("cuda:0") model = AttnPooling(num_classes=1).to(device) #model.load_state_dict(torch.load('model_checkpoints/check_point_old')['model'])
data_path_train = 'meta/training.txt' data_path_test = 'meta/testing.txt' #### Params def speech_collate(batch): targets = [] specs = [] for sample in batch: specs.append(sample['audio_wav']) targets.append((sample['labels'])) return specs, targets ### Data related dataset_train = SpeechDataGenerator(manifest=data_path_train) dataloader_train = DataLoader(dataset_train, batch_size=150, num_workers=12, shuffle=True, collate_fn=speech_collate) dataset_test = SpeechDataGenerator(manifest=data_path_test) dataloader_test = DataLoader(dataset_test, batch_size=150, num_workers=12, shuffle=True, collate_fn=speech_collate) ## Model related use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu")
parser.add_argument('-training_filepath', type=str, default='meta/training.txt') parser.add_argument('-testing_filepath', type=str, default='meta/testing.txt') parser.add_argument('-input_feat_dim', action="store_true", default=39) parser.add_argument('-num_phones', action="store_true", default=86) parser.add_argument('-num_heads', action="store_true", default=13) parser.add_argument('-num_layers', action="store_true", default=12) parser.add_argument('-lamda_val', action="store_true", default=0.1) parser.add_argument('-batch_size', action="store_true", default=32) parser.add_argument('-use_gpu', action="store_true", default=True) parser.add_argument('-num_epochs', action="store_true", default=100) args = parser.parse_args() ### Data related dataset_train = SpeechDataGenerator(manifest=args.training_filepath) dataloader_train = DataLoader(dataset_train, batch_size=args.batch_size, shuffle=True, collate_fn=speech_collate) dataset_test = SpeechDataGenerator(manifest=args.testing_filepath) dataloader_test = DataLoader(dataset_test, batch_size=args.batch_size, shuffle=True, collate_fn=speech_collate) ## Model related use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") model = BERTphone(args.input_feat_dim, args.num_phones, args.num_heads,
default='meta/training.txt') parser.add_argument('-testing_filepath', type=str, default='meta/testing.txt') parser.add_argument('-validation_filepath', type=str, default='meta/validation.txt') parser.add_argument('-input_dim', action="store_true", default=40) parser.add_argument('-num_classes', action="store_true", default=8) parser.add_argument('-lamda_val', action="store_true", default=0.1) parser.add_argument('-batch_size', action="store_true", default=128) parser.add_argument('-use_gpu', action="store_true", default=True) parser.add_argument('-num_epochs', action="store_true", default=100) args = parser.parse_args() ### Data related dataset_train = SpeechDataGenerator(manifest=args.training_filepath, mode='train') dataloader_train = DataLoader(dataset_train, batch_size=args.batch_size, shuffle=True, collate_fn=speech_collate) dataset_val = SpeechDataGenerator(manifest=args.validation_filepath, mode='train') dataloader_val = DataLoader(dataset_train, batch_size=args.batch_size, shuffle=True, collate_fn=speech_collate) dataset_test = SpeechDataGenerator(manifest=args.testing_filepath, mode='test') dataloader_test = DataLoader(dataset_test, batch_size=args.batch_size,