def main(): # Views the training images and displays the distance on anchor-negative and anchor-positive test_display_triplet_distance = False # print the experiment configuration print('\nparsed options:\n{}\n'.format(vars(args))) print('\nNumber of Classes:\n{}\n'.format(len(train_dir.classes))) # instantiate model and initialize weightsNUM_FEATURES # TODO(xin): IMPORTANT load num_classes from checkpoint model = DeepSpeakerModel(embedding_size=args.embedding_size, num_classes=len(train_dir.classes), feature_dim=num_features, frame_dim=c.NUM_FRAMES) if args.cuda: model.cuda() from torchsummary import summary summary(model, (1, c.NUM_FRAMES, c.NUM_FEATURES)) # # More detailed information on model # print(model) optimizer = create_optimizer(model, args.lr) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print('=> loading checkpoint {}'.format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) else: print('=> no checkpoint found at {}'.format(args.resume)) start = args.start_epoch end = start + args.epochs train_loader = torch.utils.data.DataLoader(train_dir, batch_size=args.batch_size, shuffle=False, **kwargs) test_loader = torch.utils.data.DataLoader(test_dir, batch_size=args.test_batch_size, shuffle=False, **kwargs) for epoch in range(start, end): if args.test_only: test(test_loader, model, epoch) return train(train_loader, model, optimizer, epoch) test(test_loader, model, epoch)
def main(): model = DeepSpeakerModel(embedding_size=256, num_classes=10) if args.resume: if os.path.isfile(args.resume): print('=> loading checkpoint {}'.format(args.resume)) checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint['state_dict']) else: print('=> no checkpoint found at {}'.format(args.resume)) # print(calculateOneEmbedding('/home/zinzin/Documents/pytorch/deepspeaker-pytorch/data/test_set/dnl/s1/t1/s1_t1_1.wav', model)) embeddings = enrollment(model) test(model, embeddings)
def main(): # Views the training images and displays the distance on anchor-negative and anchor-positive test_display_triplet_distance = False # print the experiment configuration print('\nparsed options:\n{}\n'.format(vars(args))) print('\nNumber of Classes:\n{}\n'.format(len(train_dir.classes))) # instantiate model and initialize weights model = DeepSpeakerModel(embedding_size=args.embedding_size, num_classes=len(train_dir.classes)) if args.cuda: model.cuda() optimizer = create_optimizer(model, args.lr) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print('=> loading checkpoint {}'.format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) else: print('=> no checkpoint found at {}'.format(args.resume)) start = args.start_epoch #start = 0 end = start + args.epochs train_loader = torch.utils.data.DataLoader(train_dir, batch_size=args.batch_size, shuffle=False, **kwargs) for epoch in range(start, end): train(train_loader, model, optimizer, epoch) #test(test_loader, model, epoch) #break; if test_display_triplet_distance: display_triplet_distance(model, train_loader, LOG_DIR + "/train_{}".format(epoch))
pretrained_dict = { k: v for k, v in pretrained_dict.items() if k in model_dict.keys() and v.size() == model_dict[k].size() } missed_params = [ k for k, v in model_dict.items() if not k in pretrained_dict.keys() ] print('loaded params/tot params:{}/{}'.format(len(pretrained_dict), len(model_dict))) print('miss matched params:{}'.format(missed_params)) model_dict.update(pretrained_dict) model.load_state_dict(model_dict) train_dataset = Mydataset(opt.data_path, 'train') train_loader = DataLoader(train_dataset, batch_size=opt.batch_size, num_workers=opt.num_workers, drop_last=False, shuffle=True) test_dataset = Mydataset(opt.data_path, 'test') test_loader = DataLoader(test_dataset, batch_size=opt.batch_size, num_workers=opt.num_workers, drop_last=False, shuffle=True)
def load_model(ckp_path): model = DeepSpeakerModel(embedding_size=512, num_classes=251) checkpoint = torch.load(ckp_path, map_location='cpu') model.load_state_dict(checkpoint) return model