Beispiel #1
0
def main(_):
    FLAGS = create_parser("train")  # pylint: disable=invalid-name
    hparams = create_hparams("train", FLAGS)

    train_spec = {
        "run": RayModel,
        "resources_per_trial": {
            "cpu": FLAGS.cpu,
            "gpu": FLAGS.gpu
        },
        "stop": {
            "training_iteration": hparams.num_epochs,
        },
        "config": hparams.values(),
        "local_dir": FLAGS.local_dir,
        "checkpoint_freq": FLAGS.checkpoint_freq,
        "checkpoint_at_end": FLAGS.checkpoint_at_end,
        "num_samples": FLAGS.num_samples
    }

    if FLAGS.restore:
        train_spec["restore"] = FLAGS.restore

    ray.init()
    run_experiments({FLAGS.name: train_spec})
Beispiel #2
0
def main(_):
    FLAGS = create_parser("train")  # pylint: disable=invalid-name # TODO
    hparams = create_hparams("train", FLAGS)  # TODO

    train_spec = {
        "run": RayModel,
        "resources_per_trial": {
            "cpu": FLAGS.cpu,
            "gpu": FLAGS.gpu
        },
        "stop": {
            "training_iteration": hparams.num_epochs,
        },
        "config": hparams.values(),
        "local_dir": FLAGS.local_dir,
        "checkpoint_freq": FLAGS.checkpoint_freq,
        "num_samples": FLAGS.num_samples
    }

    if FLAGS.restore:
        train_spec["restore"] = FLAGS.restore

    ray.init(
        webui_host='127.0.0.1',
        # plasma_directory="/dev/shm/",
        # memory=1024 * 1024 * 1024 * 25,  # setting 25 GB for ray workers
        # object_store_memory=1024 * 1024 * 1024 * 5,  # setting 5 GB object store
        # lru_evict=True
    )

    # copy code to local_dir
    code_dir = os.path.join(os.path.abspath(os.getcwd()), 'pba')
    dst_dir = os.path.join(FLAGS.local_dir, FLAGS.name, 'pba')
    if os.path.exists(dst_dir):
        shutil.rmtree(dst_dir)  # remove old copy of code

    shutil.copytree(code_dir, dst_dir)

    run_experiments({FLAGS.name: train_spec})

    ray.shutdown()
Beispiel #3
0
def main(_):
    args = create_parser()  # pylint: disable=invalid-name
    hparams = create_hparams("train", args)

    train_spec = {
        "run": RayModel,
        "resources_per_trial": {
            "cpu": args.cpu,
            "gpu": args.gpu
        },
        "stop": {
            "training_iteration": hparams.num_epochs,
        },
        "config": hparams.values(),
        "local_dir": args.local_dir,
        "checkpoint_freq": args.checkpoint_freq,
        "num_samples": 1
    }

    if args.restore:
        train_spec["restore"] = args.restore

    ray.init()
    run_experiments({args.train_name: train_spec})
Beispiel #4
0
def main(_):
    FLAGS = create_parser("search")  # pylint: disable=invalid-name
    hparams = create_hparams("search", FLAGS)
    hparams_config = hparams.values()

    train_spec = {
        "run": RayModel,
        "resources_per_trial": {
            "cpu": FLAGS.cpu,
            "gpu": FLAGS.gpu
        },
        "stop": {
            "training_iteration": hparams.num_epochs,
        },
        "config": hparams_config,
        "local_dir": FLAGS.local_dir,
        "checkpoint_freq": FLAGS.checkpoint_freq,
        "num_samples": FLAGS.num_samples
    }

    if FLAGS.restore:
        train_spec["restore"] = FLAGS.restore

    def explore(config):
        """Custom explore function.

    Args:
      config: dictionary containing ray config params.

    Returns:
      Copy of config with modified augmentation policy.
    """
        new_params = []
        if config["explore"] == "cifar10":
            for i, param in enumerate(config["hp_policy"]):
                if random.random() < 0.2:
                    if i % 2 == 0:
                        new_params.append(random.randint(0, 10))
                    else:
                        new_params.append(random.randint(0, 9))
                else:
                    amt = np.random.choice([0, 1, 2, 3],
                                           p=[0.25, 0.25, 0.25, 0.25])
                    # Cast np.int64 to int for py3 json
                    amt = int(amt)
                    if random.random() < 0.5:
                        new_params.append(max(0, param - amt))
                    else:
                        if i % 2 == 0:
                            new_params.append(min(10, param + amt))
                        else:
                            new_params.append(min(9, param + amt))
        else:
            raise ValueError()
        config["hp_policy"] = new_params
        return config

    ray.init()

    pbt = PopulationBasedTraining(
        time_attr="training_iteration",
        reward_attr="val_acc",
        perturbation_interval=FLAGS.perturbation_interval,
        custom_explore_fn=explore,
        log_config=True)

    run_experiments({FLAGS.name: train_spec},
                    scheduler=pbt,
                    reuse_actors=True,
                    verbose=True)
Beispiel #5
0
def main(_):
    FLAGS = create_parser("search")  # pylint: disable=invalid-name
    hparams = create_hparams("search", FLAGS)
    hparams_config = hparams.values()

    train_spec = {
        "run": RayModel,
        "resources_per_trial": {
            "cpu": FLAGS.cpu,
            "gpu": FLAGS.gpu
        },
        "stop": {
            "training_iteration": hparams.num_epochs,
        },
        "config": hparams_config,
        "local_dir": FLAGS.local_dir,
        "checkpoint_freq": FLAGS.checkpoint_freq,
        "num_samples": FLAGS.num_samples
    }

    if FLAGS.restore:
        train_spec["restore"] = FLAGS.restore

    def explore(config):
        """
        Custom explore function based on cifar10 from PBA paper

        Args:
            config: dictionary containing ray config params.

        Returns:
        Copy of config with modified augmentation policy.
        """
        new_params = []

        for i, param in enumerate(config["hp_policy"]):
            if random.random() < 0.2:
                if i % 2 == 0:
                    new_params.append(random.randint(0, 10))
                else:
                    new_params.append(random.randint(0, 9))
            else:
                amt = np.random.choice([0, 1, 2, 3],
                                       p=[0.25, 0.25, 0.25, 0.25])
                # Cast np.int64 to int for py3 json
                amt = int(amt)
                if random.random() < 0.5:
                    new_params.append(max(0, param - amt))
                else:
                    if i % 2 == 0:
                        new_params.append(min(10, param + amt))
                    else:
                        new_params.append(min(9, param + amt))
        config["hp_policy"] = new_params
        return config

    ray.init(
        webui_host='127.0.0.1',
        # memory=1024 * 1024 * 1024 * 20,    # setting 20 GB for ray workers
        # object_store_memory=1024 * 1024 * 1024 * 30,
        # lru_evict=True
    )

    # copy code to local_dir
    code_dir = os.path.join(os.path.abspath(os.getcwd()), 'pba')
    dst_dir = os.path.join(FLAGS.local_dir, FLAGS.name, 'pba')
    if os.path.exists(dst_dir):
        shutil.rmtree(dst_dir)  # remove old copy of code

    shutil.copytree(code_dir, dst_dir)

    pbt = PopulationBasedTraining(
        time_attr="training_iteration",
        reward_attr='abs_rel_acc',
        # metric="abs_rel",
        # mode="min",
        perturbation_interval=FLAGS.perturbation_interval,
        custom_explore_fn=explore,
        quantile_fraction=0.50,
        log_config=True)

    run_experiments({FLAGS.name: train_spec},
                    scheduler=pbt,
                    reuse_actors=True,
                    verbose=True)

    ray.shutdown()