def drop_classes(trainer, nClasses, num_drop=1000): ''' Drop classes randomly from the dataset ''' assert (num_drop < nClasses), "Can't drop all classes" classes_to_drop = list( np.random.choice(nClasses, size=num_drop, replace=False)) trainer.set_ignored_classes(classes_to_drop) dataset_loader = get_data_loader(args.train_list, classes_to_drop, **vars(args)) return dataset_loader, trainer
strtime = datetime.datetime.now().strftime("%Y%m%d%H%M%S") zipf = zipfile.ZipFile(result_save_path+ '/run%s.zip'%strtime, 'w', zipfile.ZIP_DEFLATED) for file in pyfiles: zipf.write(file) zipf.close() f = open(result_save_path + '/run%s.cmd'%strtime, 'w') f.write(' '.join(sys.argv)) f.close() ## Write args to scorefile scorefile = open(result_save_path+"/scores.txt", "a+"); ## Initialise data loader trainLoader = get_data_loader(args.train_list, **vars(args)); clr = s.updateLearningRate(1) while(1): print(time.strftime("%Y-%m-%d %H:%M:%S"), it, "Training %s with LR %f..."%(args.model,max(clr))); ## Train network loss, traineer = s.train_network(loader=trainLoader); ## Validate and save if it % args.test_interval == 0: print(time.strftime("%Y-%m-%d %H:%M:%S"), it, "Evaluating..."); sc, lab, _ = s.evaluateFromList(args.test_list, print_interval=100, test_path=args.test_path, eval_frames=args.eval_frames)
def main_worker(gpu, ngpus_per_node, args): args.gpu = gpu ## Load models s = SpeakerNet(**vars(args)) if args.distributed: os.environ['MASTER_ADDR'] = 'localhost' os.environ['MASTER_PORT'] = args.port dist.init_process_group(backend='nccl', world_size=ngpus_per_node, rank=args.gpu) torch.cuda.set_device(args.gpu) s.cuda(args.gpu) s = torch.nn.parallel.DistributedDataParallel( s, device_ids=[args.gpu], find_unused_parameters=True) print('Loaded the model on GPU %d' % args.gpu) else: s = WrappedModel(s).cuda(args.gpu) it = 1 ## Write args to scorefile scorefile = open(args.result_save_path + "/scores.txt", "a+") for items in vars(args): #print(items, vars(args)[items]); scorefile.write('%s %s\n' % (items, vars(args)[items])) scorefile.flush() ## Initialise trainer trainer = ModelTrainer(s, **vars(args)) ## Load model weights modelfiles = glob.glob('%s/model0*.model' % args.model_save_path) modelfiles.sort() if len(modelfiles) >= 1: trainer.loadParameters(modelfiles[-1]) print("Model %s loaded from previous state!" % modelfiles[-1]) it = int(os.path.splitext(os.path.basename(modelfiles[-1]))[0][5:]) + 1 elif (args.initial_model != ""): trainer.loadParameters(args.initial_model) print("Model %s loaded!" % args.initial_model) for ii in range(1, it): trainer.__scheduler__.step() ## Evaluation code - must run on single GPU if args.eval == True: pytorch_total_params = sum(p.numel() for p in s.module.__S__.parameters()) print('Total parameters: ', pytorch_total_params) print('Test list', args.test_list) assert args.distributed == False sc, lab, _ = trainer.evaluateFromList(**vars(args)) result = tuneThresholdfromScore(sc, lab, [1, 0.1]) scfile = open(args.result_save_path + "/answer.txt", "a+") for s in sc: scfile.write('%f\n' % (s)) scfile.flush() scfile.close() p_target = 0.05 c_miss = 1 c_fa = 1 fnrs, fprs, thresholds = ComputeErrorRates(sc, lab) mindcf, threshold = ComputeMinDcf(fnrs, fprs, thresholds, p_target, c_miss, c_fa) print('EER %2.4f MinDCF %.5f' % (result[1], mindcf)) quit() ## Initialise data loader trainLoader = get_data_loader(args.train_list, None, **vars(args)) ## Save training code and params if args.gpu == 0: pyfiles = glob.glob('./*.py') strtime = datetime.datetime.now().strftime("%Y%m%d%H%M%S") zipf = zipfile.ZipFile(args.result_save_path + '/run%s.zip' % strtime, 'w', zipfile.ZIP_DEFLATED) for file in pyfiles: zipf.write(file) zipf.close() with open(args.result_save_path + '/run%s.cmd' % strtime, 'w') as f: f.write('%s' % args) ## Core training script for it in range(it, args.max_epoch + 1): clr = [x['lr'] for x in trainer.__optimizer__.param_groups] print( time.strftime("%Y-%m-%d %H:%M:%S"), it, "Training epoch %d on GPU %d with LR %f " % (it, args.gpu, max(clr))) # -------- if args.use_dropclass: if it % args.its_per_drop == 0 or it == 1: trainLoader, trainer = drop_classes(trainer, args.nClasses, num_drop=args.num_drop) # -------- loss, traineer = trainer.train_network(trainLoader, verbose=(args.gpu == 0)) if it % args.test_interval == 0 and args.gpu == 0: ## Perform evaluation only in single GPU training if not args.distributed: sc, lab, _ = trainer.evaluateFromList(**vars(args)) result = tuneThresholdfromScore(sc, lab, [1, 0.1]) print("IT %d, VEER %2.4f" % (it, result[1])) scorefile.write("\nIT %d, VEER %2.4f\n" % (it, result[1])) trainer.saveParameters(args.model_save_path + "/model%09d.model" % it) print(time.strftime("%Y-%m-%d %H:%M:%S"), "TEER/TAcc %2.2f, TLOSS %f" % (traineer, loss)) scorefile.write("IT %d, TEER/TAcc %2.2f, TLOSS %f\n" % (it, traineer, loss)) scorefile.flush() scorefile.close()
def main_worker(args): ## Load models s = EmbedNet(**vars(args)).cuda() it = 1 ## Write args to scorefile scorefile = open(args.result_save_path + "/scores.txt", "a+") strtime = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") scorefile.write('%s\n%s\n' % (strtime, args)) scorefile.flush() ## Input transformations for training train_transform = transforms.Compose([ transforms.ToTensor(), transforms.Resize(256), transforms.RandomCrop([224, 224]), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) ## Input transformations for evaluation test_transform = transforms.Compose([ transforms.ToTensor(), transforms.Resize(256), transforms.CenterCrop([224, 224]), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) ## Initialise trainer and data loader trainLoader = get_data_loader(transform=train_transform, **vars(args)) trainer = ModelTrainer(s, **vars(args)) ## Load model weights modelfiles = glob.glob('%s/model0*.model' % args.model_save_path) modelfiles.sort() ## If the target directory already exists, start from the existing file if len(modelfiles) >= 1: trainer.loadParameters(modelfiles[-1]) print("Model %s loaded from previous state!" % modelfiles[-1]) it = int(os.path.splitext(os.path.basename(modelfiles[-1]))[0][5:]) + 1 elif (args.initial_model != ""): trainer.loadParameters(args.initial_model) print("Model %s loaded!" % args.initial_model) ## If the current iteration is not 1, update the scheduler for ii in range(1, it): trainer.__scheduler__.step() ## Evaluation code if args.eval == True: sc, lab = trainer.evaluateFromList(transform=test_transform, **vars(args)) result = tuneThresholdfromScore(sc, lab, [1, 0.1]) print('EER %2.4f' % (result[1])) quit() ## Core training script for it in range(it, args.max_epoch + 1): clr = [x['lr'] for x in trainer.__optimizer__.param_groups] print(time.strftime("%Y-%m-%d %H:%M:%S"), it, "Training epoch %d with LR %f " % (it, max(clr))) loss, traineer = trainer.train_network(trainLoader, verbose=True) if it % args.test_interval == 0: sc, lab = trainer.evaluateFromList(transform=test_transform, **vars(args)) result = tuneThresholdfromScore(sc, lab, [1, 0.1]) print("IT %d, VEER %2.4f" % (it, result[1])) scorefile.write("IT %d, VEER %2.4f\n" % (it, result[1])) trainer.saveParameters(args.model_save_path + "/model%09d.model" % it) print(time.strftime("%Y-%m-%d %H:%M:%S"), "TEER/TAcc %2.2f, TLOSS %f" % (traineer, loss)) scorefile.write("IT %d, TEER/TAcc %2.2f, TLOSS %f\n" % (it, traineer, loss)) scorefile.flush() scorefile.close()