def main(_): FLAGS = create_parser("train") # pylint: disable=invalid-name hparams = create_hparams("train", FLAGS) train_spec = { "run": RayModel, "resources_per_trial": { "cpu": FLAGS.cpu, "gpu": FLAGS.gpu }, "stop": { "training_iteration": hparams.num_epochs, }, "config": hparams.values(), "local_dir": FLAGS.local_dir, "checkpoint_freq": FLAGS.checkpoint_freq, "checkpoint_at_end": FLAGS.checkpoint_at_end, "num_samples": FLAGS.num_samples } if FLAGS.restore: train_spec["restore"] = FLAGS.restore ray.init() run_experiments({FLAGS.name: train_spec})
def main(_): FLAGS = create_parser("train") # pylint: disable=invalid-name # TODO hparams = create_hparams("train", FLAGS) # TODO train_spec = { "run": RayModel, "resources_per_trial": { "cpu": FLAGS.cpu, "gpu": FLAGS.gpu }, "stop": { "training_iteration": hparams.num_epochs, }, "config": hparams.values(), "local_dir": FLAGS.local_dir, "checkpoint_freq": FLAGS.checkpoint_freq, "num_samples": FLAGS.num_samples } if FLAGS.restore: train_spec["restore"] = FLAGS.restore ray.init( webui_host='127.0.0.1', # plasma_directory="/dev/shm/", # memory=1024 * 1024 * 1024 * 25, # setting 25 GB for ray workers # object_store_memory=1024 * 1024 * 1024 * 5, # setting 5 GB object store # lru_evict=True ) # copy code to local_dir code_dir = os.path.join(os.path.abspath(os.getcwd()), 'pba') dst_dir = os.path.join(FLAGS.local_dir, FLAGS.name, 'pba') if os.path.exists(dst_dir): shutil.rmtree(dst_dir) # remove old copy of code shutil.copytree(code_dir, dst_dir) run_experiments({FLAGS.name: train_spec}) ray.shutdown()
def main(_): args = create_parser() # pylint: disable=invalid-name hparams = create_hparams("train", args) train_spec = { "run": RayModel, "resources_per_trial": { "cpu": args.cpu, "gpu": args.gpu }, "stop": { "training_iteration": hparams.num_epochs, }, "config": hparams.values(), "local_dir": args.local_dir, "checkpoint_freq": args.checkpoint_freq, "num_samples": 1 } if args.restore: train_spec["restore"] = args.restore ray.init() run_experiments({args.train_name: train_spec})
def main(_): FLAGS = create_parser("search") # pylint: disable=invalid-name hparams = create_hparams("search", FLAGS) hparams_config = hparams.values() train_spec = { "run": RayModel, "resources_per_trial": { "cpu": FLAGS.cpu, "gpu": FLAGS.gpu }, "stop": { "training_iteration": hparams.num_epochs, }, "config": hparams_config, "local_dir": FLAGS.local_dir, "checkpoint_freq": FLAGS.checkpoint_freq, "num_samples": FLAGS.num_samples } if FLAGS.restore: train_spec["restore"] = FLAGS.restore def explore(config): """Custom explore function. Args: config: dictionary containing ray config params. Returns: Copy of config with modified augmentation policy. """ new_params = [] if config["explore"] == "cifar10": for i, param in enumerate(config["hp_policy"]): if random.random() < 0.2: if i % 2 == 0: new_params.append(random.randint(0, 10)) else: new_params.append(random.randint(0, 9)) else: amt = np.random.choice([0, 1, 2, 3], p=[0.25, 0.25, 0.25, 0.25]) # Cast np.int64 to int for py3 json amt = int(amt) if random.random() < 0.5: new_params.append(max(0, param - amt)) else: if i % 2 == 0: new_params.append(min(10, param + amt)) else: new_params.append(min(9, param + amt)) else: raise ValueError() config["hp_policy"] = new_params return config ray.init() pbt = PopulationBasedTraining( time_attr="training_iteration", reward_attr="val_acc", perturbation_interval=FLAGS.perturbation_interval, custom_explore_fn=explore, log_config=True) run_experiments({FLAGS.name: train_spec}, scheduler=pbt, reuse_actors=True, verbose=True)
def main(_): FLAGS = create_parser("search") # pylint: disable=invalid-name hparams = create_hparams("search", FLAGS) hparams_config = hparams.values() train_spec = { "run": RayModel, "resources_per_trial": { "cpu": FLAGS.cpu, "gpu": FLAGS.gpu }, "stop": { "training_iteration": hparams.num_epochs, }, "config": hparams_config, "local_dir": FLAGS.local_dir, "checkpoint_freq": FLAGS.checkpoint_freq, "num_samples": FLAGS.num_samples } if FLAGS.restore: train_spec["restore"] = FLAGS.restore def explore(config): """ Custom explore function based on cifar10 from PBA paper Args: config: dictionary containing ray config params. Returns: Copy of config with modified augmentation policy. """ new_params = [] for i, param in enumerate(config["hp_policy"]): if random.random() < 0.2: if i % 2 == 0: new_params.append(random.randint(0, 10)) else: new_params.append(random.randint(0, 9)) else: amt = np.random.choice([0, 1, 2, 3], p=[0.25, 0.25, 0.25, 0.25]) # Cast np.int64 to int for py3 json amt = int(amt) if random.random() < 0.5: new_params.append(max(0, param - amt)) else: if i % 2 == 0: new_params.append(min(10, param + amt)) else: new_params.append(min(9, param + amt)) config["hp_policy"] = new_params return config ray.init( webui_host='127.0.0.1', # memory=1024 * 1024 * 1024 * 20, # setting 20 GB for ray workers # object_store_memory=1024 * 1024 * 1024 * 30, # lru_evict=True ) # copy code to local_dir code_dir = os.path.join(os.path.abspath(os.getcwd()), 'pba') dst_dir = os.path.join(FLAGS.local_dir, FLAGS.name, 'pba') if os.path.exists(dst_dir): shutil.rmtree(dst_dir) # remove old copy of code shutil.copytree(code_dir, dst_dir) pbt = PopulationBasedTraining( time_attr="training_iteration", reward_attr='abs_rel_acc', # metric="abs_rel", # mode="min", perturbation_interval=FLAGS.perturbation_interval, custom_explore_fn=explore, quantile_fraction=0.50, log_config=True) run_experiments({FLAGS.name: train_spec}, scheduler=pbt, reuse_actors=True, verbose=True) ray.shutdown()