def optimize(cfg): search_space = { "ae_lr": tune.loguniform(1e-6, 1e-2), } # Points to evaluate best_params = [{"ae_lr": 3e-4}] search_alg = HyperOptSearch(metric="val_return", mode="max", points_to_evaluate=best_params) analysis = tune.run(tune.with_parameters(train, params=cfg.worker), num_samples=1, config=search_space, resources_per_trial={ 'cpu': 4, 'gpu': 1 }) search_alg.save("./opt_checkpoint.pkl") print("best config: ", analysis.get_best_config(metric="return", mode="max"))
def hyperopt(source_dir, only, target_imgs, target_annotations, eval_imgs, eval_annotations, masks, batch_size, dataloader_workers, epochs, samples, name, load_gan, load, load_algo, out_dir): ''' Optimize DIHE hyperparameters. ''' config = { 'batchnorm': tune.choice([True, False]), 'multiplier': tune.uniform(0.5, 0.99999), 'enc_lr': tune.uniform(1e-9, 1e-3), } algo = HyperOptSearch() if load_algo is not None: algo.restore(load_algo) scheduler = ASHAScheduler(max_t = epochs) result = tune.run( partial(hyperopt.dihe, source_dir=source_dir, target_imgs=target_imgs, target_annotations=target_annotations, eval_imgs=eval_imgs, eval_annotations=eval_annotations, load_gan=load_gan, masks=masks, source_only=only, target_skip=SKU110K_SKIP, eval_only=GP_TEST_VALIDATION_SET_SIZE, batch_size=batch_size, dataloader_workers=dataloader_workers, epochs=epochs), name=name, metric='accuracy', mode='max', resources_per_trial={'gpu': 1, 'cpu': dataloader_workers + 1}, config=config, num_samples=samples, scheduler=scheduler, search_alg=algo, resume=load, ) algo.save(os.path.join(out_dir, f'{name}_search.pkl')) df = result.results_df for batchnorm in (True, False): matching = df[df['config.batchnorm'] == batchnorm] print(f'Best with batchnorm={batchnorm}: {matching.loc[matching["accuracy"].idxmax()]}') print()
local_dir = "" if args.restorePath: path = args.restorePath + "/experiment/" + args.restoreFile print('Restore from ' + path) hyperopt.restore(path) local_dir = args.restorePath print("Training logs will be saved to " + local_dir) else: local_dir = args.rayResult hyperopt_limited = ConcurrencyLimiter(hyperopt, max_concurrent=args.max_concurrent) trainable = DistributedTrainableCreator(training_initialization(), num_slots=int(args.numGPU), use_gpu=True) analysis = tune.run( trainable, #resources_per_trial=resources, scheduler=asha, search_alg=hyperopt_limited, num_samples=int(args.numHparams), config=config, name='experiment', local_dir=local_dir) print("Searcher_state is saved to " + local_dir + "/experiment/searcher_state.pkl") hyperopt.save(local_dir + "/experiment/searcher_state.pkl")
def hyperopt(imgs, annotations, eval_annotations, name, batch_size, dataloader_workers, epochs, samples, load, load_algo, out_dir): ''' Optimize GLN hyperparameters. ''' config = { 'tanh': tune.choice([True, False]), 'multiplier': tune.uniform(0.8, 0.99999), 'scale_class': tune.uniform(0.1, 10), 'scale_gaussian': tune.uniform(0.1, 100), 'gauss_loss_neg_thresh': 0, 'gauss_loss_pos_thresh': tune.uniform(0, 1), } initial_configs = [ { 'tanh': True, 'multiplier': 0.99, 'scale_class': 1, 'scale_gaussian': 1, 'gauss_loss_neg_thresh': 0, 'gauss_loss_pos_thresh': 0.1, }, { 'tanh': False, 'multiplier': 0.99, 'scale_class': 1, 'scale_gaussian': 1, 'gauss_loss_neg_thresh': 0, 'gauss_loss_pos_thresh': 0.1, }, ] algo = HyperOptSearch(points_to_evaluate=initial_configs if not load and load_algo is None else None) if load_algo is not None: algo.restore(load_algo) scheduler = ASHAScheduler(max_t=epochs, grace_period=2) result = tune.run( partial(hyperopt.gln, imgs=imgs, annotations=annotations, eval_annotations=eval_annotations, skip=SKU110K_SKIP, batch_size=batch_size, dataloader_workers=dataloader_workers, epochs=epochs), name=name, metric='average_precision', mode='max', resources_per_trial={ 'gpu': 1, 'cpu': dataloader_workers + 1 }, max_failures= 2, # Single-GPU training of GLN is prone to exploding gradients raise_on_failed_trial=False, config=config, num_samples=samples, scheduler=scheduler, search_alg=algo, resume=load, ) algo.save(os.path.join(out_dir, f'{name}_search.pkl')) df = result.results_df for tanh in (True, False): matching = df[df['config.tanh'] == tanh] print( f'Best with tanh={tanh}: {matching.loc[matching["average_precision"].idxmax()]}' ) print()
if os.path.isfile(hyperopt_cp): print("Restore Hyperopt from checkpoint: ", hyperopt_cp) hyperopt.restore(hyperopt_cp) re_search_alg = Repeater(hyperopt, repeat=NFOLDS) from moa_utils.main import set_hyperopt set_hyperopt(hyperopt) ahb = AsyncHyperBandScheduler( time_attr="training_iteration", metric="valid_loss", mode="min", grace_period=5, max_t=100) tune.run(run_training, # config=config, name="hyperopt_run_2", local_dir="./ray_results", search_alg=re_search_alg, scheduler=ahb, num_samples=NFOLDS*60, #stop={"training_iteration": 5}, resources_per_trial={"cpu": 1} ) hyperopt.save(hyperopt_cp) # %% [code]