async def local_training(config, train_loader, test_loader): model = MyNet(config.model) model.load_state_dict(config.para) model = model.to(device) optimizer = optim.SGD(model.parameters(), lr=0.1) test_acc = train(args, config, model, device, train_loader, test_loader, optimizer, config.epoch_num) config.acc = test_acc config.model = models.Net2Tuple(model) config.para = dict(model.named_parameters()) print("before send") await send_data(config, MASTER_IP, MASTER_LISTEN_PORT) print("after send") config_received = await get_data(LISTEN_PORT, LOCAL_IP) for k, v in config_received.__dict__.items(): setattr(config, k, v)
def main(args): """ Main function. """ # --------------------------------- DATA --------------------------------- # Tokenizer logging.disable(logging.INFO) tokenizer = BertTokenizer.from_pretrained(os.path.join( 'pretrained-models', args.embedding), do_lower_case=args.do_lower_case) logging.disable(logging.NOTSET) tokenization_function = tokenizer.tokenize # Pre-processsing: apply basic tokenization (both) then split into wordpieces (BERT only) data = {} for split in ['train', 'test']: if args.task == 'classification': func = load_classification_dataset elif args.task == 'sequence_labelling': func = load_sequence_labelling_dataset else: raise NotImplementedError data[split] = func(step=split, do_lower_case=args.do_lower_case) retokenize(data[split], tokenization_function) logging.info('Splitting training data into train / validation sets...') data['validation'] = data['train'][:int(args.validation_ratio * len(data['train']))] data['train'] = data['train'][int(args.validation_ratio * len(data['train'])):] logging.info('New number of training sequences: %d', len(data['train'])) logging.info('New number of validation sequences: %d', len(data['validation'])) # Count target labels or classes if args.task == 'classification': counter_all = Counter([ example.label for example in data['train'] + data['validation'] + data['test'] ]) counter = Counter([example.label for example in data['train']]) # Maximum sequence length is either 512 or maximum token sequence length + 5 max_seq_length = min( 512, 5 + max( map(len, [ e.tokens_a if e.tokens_b is None else e.tokens_a + e.tokens_b for e in data['train'] + data['validation'] + data['test'] ]))) elif args.task == 'sequence_labelling': counter_all = Counter([ label for example in data['train'] + data['validation'] + data['test'] for label in example.label_sequence ]) counter = Counter([ label for example in data['train'] for label in example.label_sequence ]) # Maximum sequence length is either 512 or maximum token sequence length + 5 max_seq_length = min( 512, 5 + max( map(len, [ e.token_sequence for e in data['train'] + data['validation'] + data['test'] ]))) else: raise NotImplementedError labels = sorted(counter_all.keys()) num_labels = len(labels) logging.info("Goal: predict the following labels") for i, label in enumerate(labels): logging.info("* %s: %s (count: %s)", label, i, counter[label]) # Input features: list[token indices] pad_token_id = tokenizer.convert_tokens_to_ids([tokenizer.pad_token])[0] pad_token_label_id = None if args.task == 'sequence_labelling': pad_token_label_id = CrossEntropyLoss().ignore_index dataset = {} logging.info("Maximum sequence lenght: %s", max_seq_length) for split in data: dataset[split] = build_and_cache_features( args, split=split, tokenizer=tokenizer, examples=data[split], labels=labels, pad_token_id=pad_token_id, pad_token_label_id=pad_token_label_id, max_seq_length=max_seq_length) del data # Not used anymore # --------------------------------- MODEL --------------------------------- # Initialize model if args.task == 'classification': model = BertForSequenceClassification elif args.task == 'sequence_labelling': model = BertForTokenClassification else: raise NotImplementedError logging.info('Loading `%s` model...', args.embedding) logging.disable(logging.INFO) config = BertConfig.from_pretrained(os.path.join('pretrained-models', args.embedding), num_labels=num_labels) model = model.from_pretrained(os.path.join('pretrained-models', args.embedding), config=config) logging.disable(logging.NOTSET) model.to(args.device) logging.info('Model:\n%s', model) # ------------------------------ TRAIN / EVAL ------------------------------ # Log args logging.info('Using the following arguments for training:') for k, v in vars(args).items(): logging.info("* %s: %s", k, v) # Training if args.do_train: global_step, train_loss, best_val_metric, best_val_epoch = train( args=args, dataset=dataset, model=model, tokenizer=tokenizer, labels=labels, pad_token_label_id=pad_token_label_id) logging.info("global_step = %s, average training loss = %s", global_step, train_loss) logging.info("Best performance: Epoch=%d, Value=%s", best_val_epoch, best_val_metric) # Evaluation on test data if args.do_predict: # Load best model if args.task == 'classification': model = BertForSequenceClassification elif args.task == 'sequence_labelling': model = BertForTokenClassification else: raise NotImplementedError logging.disable(logging.INFO) model = model.from_pretrained(args.output_dir) logging.disable(logging.NOTSET) model.to(args.device) # Compute predictions and metrics results, _ = evaluate(args=args, eval_dataset=dataset["test"], model=model, labels=labels, pad_token_label_id=pad_token_label_id) # Save metrics with open(os.path.join(args.output_dir, 'performance_on_test_set.txt'), 'w') as f: f.write(f'best validation score: {best_val_metric}\n') f.write(f'best validation epoch: {best_val_epoch}\n') f.write('--- Performance on test set ---\n') for k, v in results.items(): f.write(f'{k}: {v}\n')
loaders = [(train_loader_poisson, 'train_loader_poisson'), (train_loader_noise, 'train_loader_noise'), (train_loader_clean, 'train_loader_clean')] n_epochs = 40 for train_loader, loader_name in loaders: logger = SummaryWriter(f'runs/noise2noise_{loader_name}') print( f"\n\nTraining noise2noise for {n_epochs} epochs with loader {loader_name}" ) for epoch in tqdm.tqdm(range(n_epochs), total=n_epochs): # train train(net, train_loader, optimizer, LOSS_CRITERION, epoch, log_interval=25, tb_logger=logger, device=device) step = epoch * len(train_loader.dataset) # validate validate(net, val_loader, LOSS_CRITERION, EVAL_METRIC, step=step, tb_logger=logger, device=device) """## Exercises 1. Train a separete denoising model using clean target and compare the PSNR scores with those obtained with noise2noise model. Compare results of the two models visually in tensorboard.
print(datetime.now(), len(train_id_type_list), len(val_id_type_list)) assert len(to_set(train_id_type_list) & to_set(val_id_type_list)) == 0, "WTF" cnn = params['network'](lr=params['lr_kwargs']['lr'], **params, **params['network_kwargs']) params['save_prefix'] = params['save_prefix_template'].format( cnn_name=cnn.name, fold_index=val_fold_index - 1) print("\n {} - Loaded {} model ...".format(datetime.now(), cnn.name)) if 'pretrained_model' in params: load_pretrained_model(cnn, **params) print("\n {} - Start training ...".format(datetime.now())) h = train(cnn, train_id_type_list, val_id_type_list, **params) if h is None: continue hists.append(h) # ### Validation all classes n_runs = 2 n_folds = 5 run_counter = 0 cv_mean_scores = np.zeros((n_runs, n_folds)) val_fold_indices = [] # !!! CHECK BEFORE LOAD TO FLOYD params['pretrained_model'] = 'load_best' _trainval_id_type_list = np.array(trainval_id_type_list)
# build the dice coefficient metric metric = DiceCoefficient() # train for 25 epochs start = int(time.time()) stop = 0 best_accuracy = 0. checkpoint_name = './best_checkpoint_{name}_{loss_name}.tar'.format( name=name, loss_name=loss_name) best_epoch = 0 for epoch in tqdm.tqdm(range(n_epochs), total=n_epochs): # train train(net, train_loader, optimizer, loss_function, epoch, tb_logger=logger, device=device) step = epoch * len(train_loader.dataset) # validate _, acc = validate(net, val_loader, loss_function, metric, step=step, tb_logger=logger, device=device, optimizer=optimizer)
def main(): global best_metrics # Parse the arguments args = parser.parse_args() # Create the SummaryWriter for Tensorboard args.writer = SummaryWriter('./logs/tensorboard/{}'.format(args.run_id)) # Set the RNG seegs if args.seed is not None: random.seed(args.seed) torch.manual_seed(args.seed) cudnn.deterministic = True warnings.warn('You have chosen to seed training. \ This will turn on the CUDNN deterministic setting, \ which can slow down your training considerably! \ You may see unexpected behavior when restarting \ from checkpoints.') # Print out the training setup print('New training run...\n') print(' Run ID: {}'.format(args.run_id)) print(' Architecture: {}'.format(args.arch)) print(' Batch size: {}'.format(args.batch_size)) print(' Learning rate: {}'.format(args.learning_rate)) print(' Decay rate: {}\n'.format(args.decay_rate)) # Create the model print("=> creating model...") device = torch.device('cuda') model = models.__dict__[args.arch](pretrained=False, num_classes=args.classes).to(device) if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): model.features = torch.nn.DataParallel(model.features) model.cuda() else: model = torch.nn.DataParallel(model).cuda() criterion = nn.CrossEntropyLoss().to(device) optimizer = torch.optim.Adam(model.parameters(), args.learning_rate) lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, mode='min', factor=args.decay_rate, patience=10) cudnn.benchmark = True # Create the datasets and loaders print('=> creating the datasets and iterators') # Create the training dataset and loader training_transform = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)) ]) training_dataset = datasets.CIFAR10('./data', train=True, download=True, transform=training_transform) training_loader = torch.utils.data.DataLoader(training_dataset, batch_size=args.batch_size, shuffle=True) validation_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)) ]) validation_dataset = datasets.CIFAR10('./data', train=False, transform=validation_transform) validation_loader = torch.utils.data.DataLoader(validation_dataset, batch_size=args.batch_size, shuffle=True) # Save the lengths of the data loaders for Tensorboard args.train_loader_len = len(training_loader) args.validation_loader_len = len(validation_loader) # Train the model print('=> starting the training\n') for epoch in range(args.epochs): # Set the current epoch to be used by Tensorboard args.current_epoch = epoch # Take a training step train(training_loader, model, criterion, optimizer, epoch, device, args) # Evaluate on validation set and check if it is the current best val_loss, metrics = validate(validation_loader, model, criterion, device, args) best_metrics, is_best = test_best_metrics(metrics, best_metrics) # Take a step using the learning rate scheduler lr_scheduler.step(val_loss) save_checkpoint( { 'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'best_acc': best_metrics[0], 'best_pre': best_metrics[1], 'best_rec': best_metrics[2] }, is_best, args) # Close the Tensorboard writer args.writer.close()
placeholder_x = [0] * 20 placeholder_y = [0] * 5 train_episodes = training_utils.bucketing( bucket_size=mini_batch_size, episodes=train_episodes, placeholder_timestep=[placeholder_x, placeholder_y]) #test_episodes = training_utils.bucketing(bucket_size=mini_batch_size, episodes=test_episodes, placeholder_timestep=[placeholder_x, placeholder_y]) validation_episodes = training_utils.bucketing( bucket_size=mini_batch_size, episodes=validation_episodes, placeholder_timestep=[placeholder_x, placeholder_y]) training_utils.train(train_data_xy=train_episodes, validation_data_xy=validation_episodes, model=model, batch_size=mini_batch_size, G=G) model = reload_model(model, batch_size=1) success = 0 for i in range(0, 5): ml_dur, gt_dur = run_optimality_evaluation(G) if ml_dur != None and gt_dur != None: success += 1 print("Success: ", success) # arg_parser = create_arg_parser()
def main(): args = get_args() if args.model_type in ["bert", "roberta", "distilbert", "camembert"] and not \ (args.mlm or args.token_discrimination or args.mask_token_discrimination): raise ValueError( "BERT and RoBERTa-like models do not have LM heads but masked LM heads. They must be run using the --mlm " "flag (masked language modeling).") if args.eval_data_file is None and args.do_eval: raise ValueError( "Cannot do evaluation without an evaluation data file. Either supply a file to --eval_data_file " "or remove the --do_eval argument.") if args.should_continue: sorted_checkpoints = _sorted_checkpoints(args) if len(sorted_checkpoints) == 0: raise ValueError( "Used --should_continue but no checkpoint was found in --output_dir." ) else: args.model_name_or_path = sorted_checkpoints[-1] if (os.path.exists(args.output_dir) and os.listdir(args.output_dir) and args.do_train and not args.overwrite_output_dir): raise ValueError( "Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome." .format(args.output_dir)) # Setup distant debugging if needed if args.server_ip and args.server_port: # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script import ptvsd print("Waiting for debugger attach") ptvsd.enable_attach(address=(args.server_ip, args.server_port), redirect_output=True) ptvsd.wait_for_attach() # Setup CUDA, GPU & distributed training if args.local_rank == -1 or args.no_cuda: device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") args.n_gpu = 0 if args.no_cuda else torch.cuda.device_count() else: # Initializes the distributed backend which will take care of sychronizing nodes/GPUs torch.cuda.set_device(args.local_rank) device = torch.device("cuda", args.local_rank) torch.distributed.init_process_group(backend="nccl") args.n_gpu = 1 args.device = device # Setup logging logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN, ) logger.warning( "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s", args.local_rank, device, args.n_gpu, bool(args.local_rank != -1), args.fp16, ) # Set seed set_seed(args) # Load pretrained model and tokenizer if args.local_rank not in [-1, 0]: torch.distributed.barrier( ) # Barrier to make sure only the first process in distributed training download model & vocab if args.config_name: config = AutoConfig.from_pretrained(args.config_name, cache_dir=args.cache_dir) elif args.model_name_or_path: config = AutoConfig.from_pretrained(args.model_name_or_path, cache_dir=args.cache_dir) else: # When we release a pip version exposing CONFIG_MAPPING, # we can do `config = CONFIG_MAPPING[args.model_type]()`. raise ValueError( "You are instantiating a new config instance from scratch. This is not supported, but you can do it from another script, save it," "and load it from here, using --config_name") if args.tokenizer_name: tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_name, cache_dir=args.cache_dir) elif args.model_name_or_path: tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path, cache_dir=args.cache_dir) else: raise ValueError( "You are instantiating a new tokenizer from scratch. This is not supported, but you can do it from another script, save it," "and load it from here, using --tokenizer_name") if args.block_size <= 0: args.block_size = tokenizer.max_len # Our input block size will be the max possible for the model else: args.block_size = min(args.block_size, tokenizer.max_len) if args.model_name_or_path and (args.token_discrimination or args.mask_token_discrimination): model = RobertaForTokenDiscrimination.from_pretrained( args.model_name_or_path, from_tf=bool(".ckpt" in args.model_name_or_path), config=config, cache_dir=args.cache_dir, ) elif args.model_name_or_path and args.mlm: model = AutoModelWithLMHead.from_pretrained( args.model_name_or_path, from_tf=bool(".ckpt" in args.model_name_or_path), config=config, cache_dir=args.cache_dir, ) else: logger.info("Training new model from scratch") model = AutoModelWithLMHead.from_config(config) model.to(args.device) if args.local_rank == 0: torch.distributed.barrier() # End of barrier # to make sure only the first process # in distributed training download model & vocab logger.info("Training/evaluation parameters %s", args) # Training if args.do_train: if args.local_rank not in [-1, 0]: torch.distributed.barrier( ) # Barrier to make sure only the first process in distributed training process the dataset, and the others will use the cache train_dataset = load_and_cache_examples(args, tokenizer, evaluate=False) if args.local_rank == 0: torch.distributed.barrier() global_step, tr_loss = train(args, train_dataset, model, tokenizer) logger.info(" global_step = %s, average loss = %s", global_step, tr_loss) # Saving best-practices: if you use save_pretrained for the model and tokenizer, # you can reload them using from_pretrained() if args.do_train and (args.local_rank == -1 or torch.distributed.get_rank() == 0): # Create output directory if needed if args.local_rank in [-1, 0]: os.makedirs(args.output_dir, exist_ok=True) logger.info("Saving model checkpoint to %s", args.output_dir) # Save a trained model, configuration and tokenizer using `save_pretrained()`. # They can then be reloaded using `from_pretrained()` model_to_save = (model.module if hasattr(model, "module") else model ) # Take care of distributed/parallel training model_to_save.save_pretrained(args.output_dir) tokenizer.save_pretrained(args.output_dir) # Good practice: save your training arguments together with the trained model torch.save(args, os.path.join(args.output_dir, "training_args.bin")) # Load a trained model and vocabulary that you have fine-tuned if args.mlm: model = AutoModelWithLMHead.from_pretrained(args.output_dir) elif args.token_discrimination or args.mask_token_discrimination: model = RobertaForTokenDiscrimination.from_pretrained( args.output_dir) else: raise NotImplementedError( 'only mlm and token discrimination loss supported') tokenizer = AutoTokenizer.from_pretrained(args.output_dir) model.to(args.device) # Evaluation results = {} if args.do_eval and args.local_rank in [-1, 0]: checkpoints = [args.output_dir] if args.eval_all_checkpoints: checkpoints = list( os.path.dirname(c) for c in sorted( glob.glob(args.output_dir + "/**/" + WEIGHTS_NAME, recursive=True))) logging.getLogger("transformers.modeling_utils").setLevel( logging.WARN) # Reduce logging logger.info("Evaluate the following checkpoints: %s", checkpoints) for checkpoint in checkpoints: global_step = checkpoint.split( "-")[-1] if len(checkpoints) > 1 else "" prefix = checkpoint.split( "/")[-1] if checkpoint.find("checkpoint") != -1 else "" if args.mlm: model = AutoModelWithLMHead.from_pretrained(checkpoint) elif args.token_discrimination or args.mask_token_discrimination: model = RobertaForTokenDiscrimination.from_pretrained( checkpoint) else: raise NotImplementedError( 'only mlm and token discrimination loss supported') model.to(args.device) result = evaluate(args, model, tokenizer, prefix=prefix) result = dict( (k + "_{}".format(global_step), v) for k, v in result.items()) results.update(result) return results
from training_utils import train import sys MORGAN_FP_SIZE = 1024 MORGAN_FP_RADIUS = 2 N_EPISODES = 2000 EPISODE_LENGTH = 45 BOOTSTRAP_HEADS = 5 if __name__ == '__main__': agent = BootstrappedDQN(MORGAN_FP_SIZE, MORGAN_FP_SIZE, n_heads=BOOTSTRAP_HEADS) if sys.argv[1] == 'qed': env = QEDMolEnv({'C', 'O', 'N', 'Cl'}, max_steps=EPISODE_LENGTH) elif sys.argv[1] == 'penalized_logp': env = PenalizedLogpEnv({'C', 'O', 'N', 'Cl'}, max_steps=EPISODE_LENGTH) elif sys.argv[1] == 'benchmark': env = BenchmarkEnv({'C', 'O', 'N', 'Cl'}, max_steps=EPISODE_LENGTH) else: print('BAD ARGS') sys.exit(1) mfp = MorganFingerprintProvider(MORGAN_FP_SIZE, MORGAN_FP_RADIUS) molecule_pool = list(train(env, agent, mfp, N_EPISODES, EPISODE_LENGTH)) with open('OUT_MOLS_%s.smiles' % sys.argv[1], 'w') as f: for m in molecule_pool: if m is None: continue f.write('%s\n' % m) print('DONE')
nn.ReLU(inplace=True), nn.BatchNorm2d(64), nn.Conv2d(64, 64, kernel_size=3, stride=1), nn.ReLU(inplace=True), nn.BatchNorm2d(64), nn.AdaptiveMaxPool2d(32), ## 32x32 Flatten(), nn.Linear(64*32*32, 1024), nn.ReLU(inplace=True), nn.Linear(1024, 17) ) model.type(dtype) loss_fn = nn.MultiLabelSoftMarginLoss().type(dtype) optimizer = optim.Adam(model.parameters(), lr=1e-3) ## don't load model params from file - instead retrain the model if not from_pickle: train(train_loader, model, loss_fn, optimizer, dtype, print_every=10) ## serialize model data and save as .pkl file torch.save(model.state_dict(), save_model_path) print("model saved as {}".format(os.path.abspath(save_model_path))) ## load model params from file else: state_dict = torch.load(save_model_path, map_location=lambda storage, loc: storage) model.load_state_dict(state_dict) print("model loaded from {}".format(os.path.abspath(save_model_path))) acc = validate_epoch(model, val_loader, dtype) print(acc)
## loader train_loader = DataLoader( training_dataset, batch_size=256, shuffle=True, num_workers=4 # 0 for CUDA ) ## simple linear model model = nn.Sequential(nn.Conv2d(4, 16, kernel_size=3, stride=1), nn.ReLU(inplace=True), nn.BatchNorm2d(16), nn.AdaptiveMaxPool2d(128), nn.Conv2d(16, 32, kernel_size=3, stride=1), nn.ReLU(inplace=True), nn.BatchNorm2d(32), nn.AdaptiveMaxPool2d(64), Flatten(), nn.Linear(32 * 64 * 64, 1024), nn.ReLU(inplace=True), nn.Linear(1024, 17)) model.type(dtype) loss_fn = nn.BCELoss().type(dtype) optimizer = optim.Adam(model.parameters(), lr=5e-2) ## don't load model params from file - instead retrain the model if not from_pickle: train(train_loader, model, loss_fn, optimizer, dtype) ## serialize model data and save as .pkl file torch.save(model.state_dict(), save_model_path) print("model saved as {}".format(os.path.abspath)) ## load model params from file else: state_dict = torch.load(save_model_path) model.load_state_dict(state_dict)
for t, (x, y) in enumerate(train_loader): x_var = Variable(x.type(dtype)).cuda() size=temp_model(x_var).size() if(t==0): break model = nn.Sequential( nn.Conv2d(4, 16, kernel_size=3, stride=1), nn.ReLU(inplace=True), nn.BatchNorm2d(16), nn.AdaptiveMaxPool2d(128), nn.Conv2d(16, 32, kernel_size=3, stride=1), nn.ReLU(inplace=True), nn.BatchNorm2d(32), nn.AdaptiveMaxPool2d(64), Flatten(), nn.Linear(size[1], 1024), nn.ReLU(inplace=True), nn.Linear(1024, 17)) model.type(dtype) model.train() loss_fn = nn.MultiLabelSoftMarginLoss().type(dtype) optimizer = optim.Adam(model.parameters(), lr=5e-2) torch.cuda.synchronize() train(train_loader, model, loss_fn, optimizer, dtype,num_epochs=1, print_every=10) torch.save(model.state_dict(), save_model_path) state_dict = torch.load(save_model_path) model.load_state_dict(state_dict)
def main(): total_steps = 0 results_file = open(RESULTS_WEIGHTS_PATH.joinpath("results.txt"), "w") age_criterion = nn.MSELoss() sex_criterion = nn.BCELoss() age_pred = torch.empty(0).to(DEVICE) age_data = torch.empty(0).to(DEVICE) sex_pred = torch.empty(0).to(DEVICE) sex_data = torch.empty(0).to(DEVICE) for i in range(N_FOLDS): model = m.Dasnet().to(DEVICE) optimizer = torch.optim.Adadelta(model.parameters(), lr=1.0, rho=0.95, eps=1e-06) if FIXED_GROUPS: training_gen, eval_gen, test_gen = dat_ut.fixed_dataset_generator() elif STATIC_TEST: training_gen, eval_gen, test_gen = dat_ut.kfold_generator_simple(i) else: training_gen, eval_gen, test_gen = dat_ut.kfold_generator_simple(i) best_epoch_result = [-1, -1, -1, -1] best_epoch = 0 best_epoch_model = dict() no_upgrade_cont = 0 for k in range(1, N_EPOCHS + 1): train_ut.train(model, training_gen, age_criterion, sex_criterion, optimizer) _, _, _, _, total_loss, age_loss, sex_loss, avg_age_diff, avg_sex_diff = train_ut.validate( model, eval_gen, age_criterion, sex_criterion) if best_epoch_result[0] >= total_loss or best_epoch_result[0] == -1: best_epoch_result = [ total_loss, age_loss, sex_loss, avg_age_diff, avg_sex_diff ] best_epoch_model = model.state_dict() best_epoch = k no_upgrade_cont = 0 if best_epoch_result[0] < total_loss: no_upgrade_cont += 1 if no_upgrade_cont == MAX_ITER_NO_IMPROVE: print("UPGRADE FIN / EPOCH: {}".format(best_epoch), file=results_file) print("FINAL EPOCH: {}".format(k), file=results_file) break model.load_state_dict(best_epoch_model) torch.save( model.state_dict(), RESULTS_WEIGHTS_PATH.joinpath("model_weights.pth".format(i))) age, age_out, sex, sex_out, total_test_loss, age_test_loss, sex_test_loss, avg_age_diff, avg_sex_diff = train_ut.validate( model, test_gen, age_criterion, sex_criterion, 'test') print( "TEST :: TOTAL LOSS = {} \nAGE_LOSS = {} / SEX_LOSS = {} \nAVG_AGE_DIFF = {} / AVG_SEX_DIFF = {}" .format(total_test_loss, age_test_loss, sex_test_loss, avg_age_diff, avg_sex_diff), file=results_file) age_data = torch.cat((age_data, age), 0) age_pred = torch.cat((age_pred, age_out), 0) sex_data = torch.cat((sex_data, sex), 0) sex_pred = torch.cat((sex_pred, sex_out), 0) print_metrics(age, age_out, sex, sex_out, results_file) save_results(age_data, age_pred, sex_data, sex_pred) results_file.close()