Example #1
0
def main(args, unknown_args):
    args, config = parse_args_uargs(args, unknown_args, dump_config=True)

    os.makedirs(args.logdir, exist_ok=True)
    save_config(config=config, logdir=args.logdir)
    if args.expdir is not None:
        modules = prepare_modules(  # noqa: F841
            expdir=args.expdir, dump_dir=args.logdir)

    algorithm = Registry.get_fn("algorithm", args.algorithm)
    algorithm_kwargs = algorithm.prepare_for_trainer(config)

    redis_server = StrictRedis(port=config.get("redis", {}).get("port", 12000))
    redis_prefix = config.get("redis", {}).get("prefix", "")

    pprint(config["trainer"])
    pprint(algorithm_kwargs)

    trainer = Trainer(**config["trainer"],
                      **algorithm_kwargs,
                      logdir=args.logdir,
                      redis_server=redis_server,
                      redis_prefix=redis_prefix)

    pprint(trainer)

    def on_exit():
        for p in trainer.get_processes():
            p.terminate()

    atexit.register(on_exit)

    trainer.run()
Example #2
0
def main(args, unknown_args):
    args, config = parse_args_uargs(args, unknown_args, dump_config=True)

    os.makedirs(args.logdir, exist_ok=True)
    save_config(config=config, logdir=args.logdir)
    if args.expdir is not None:
        modules = prepare_modules(  # noqa: F841
            expdir=args.expdir, dump_dir=args.logdir)

    algorithm = Registry.get_fn("algorithm", args.algorithm)
    if args.environment is not None:
        # @TODO: remove this hack
        # come on, just refactor whole rl
        environment_fn = Registry.get_fn("environment", args.environment)
        env = environment_fn(**config["env"])
        config["shared"]["observation_size"] = env.observation_shape[0]
        config["shared"]["action_size"] = env.action_shape[0]
        del env
    algorithm_kwargs = algorithm.prepare_for_trainer(config)

    redis_server = StrictRedis(port=config.get("redis", {}).get("port", 12000))
    redis_prefix = config.get("redis", {}).get("prefix", "")

    pprint(config["trainer"])
    pprint(algorithm_kwargs)

    trainer = Trainer(**config["trainer"],
                      **algorithm_kwargs,
                      logdir=args.logdir,
                      redis_server=redis_server,
                      redis_prefix=redis_prefix)

    pprint(trainer)

    def on_exit():
        for p in trainer.get_processes():
            p.terminate()

    atexit.register(on_exit)

    trainer.run()
Example #3
0
def main(args, unknown_args):
    args, config = parse_args_uargs(args, unknown_args)

    os.makedirs(args.logdir, exist_ok=True)
    save_config(config=config, logdir=args.logdir)
    if args.expdir is not None:
        modules = prepare_modules(  # noqa: F841
            expdir=args.expdir, dump_dir=args.logdir)

    algorithm = Registry.get_fn("algorithm", args.algorithm)
    environment = Registry.get_fn("environment", args.environment)

    processes = []
    sampler_id = 0

    def on_exit():
        for p in processes:
            p.terminate()

    atexit.register(on_exit)

    params = dict(logdir=args.logdir,
                  algorithm=algorithm,
                  environment=environment,
                  config=config,
                  resume=args.resume,
                  redis=args.redis)

    if args.debug:
        params_ = dict(
            vis=False,
            infer=False,
            action_noise=0.5,
            param_noise=0.5,
            action_noise_prob=args.action_noise_prob,
            param_noise_prob=args.param_noise_prob,
            id=sampler_id,
        )
        run_sampler(**params, **params_)

    for i in range(args.vis):
        params_ = dict(
            vis=False,
            infer=False,
            action_noise_prob=0,
            param_noise_prob=0,
            id=sampler_id,
        )
        p = mp.Process(target=run_sampler, kwargs=dict(**params, **params_))
        p.start()
        processes.append(p)
        sampler_id += 1

    for i in range(args.infer):
        params_ = dict(
            vis=False,
            infer=True,
            action_noise_prob=0,
            param_noise_prob=0,
            id=sampler_id,
        )
        p = mp.Process(target=run_sampler, kwargs=dict(**params, **params_))
        p.start()
        processes.append(p)
        sampler_id += 1

    for i in range(1, args.train + 1):
        action_noise = args.max_action_noise * i / args.train \
            if args.max_action_noise is not None \
            else None
        param_noise = args.max_param_noise * i / args.train \
            if args.max_param_noise is not None \
            else None
        params_ = dict(
            vis=False,
            infer=False,
            action_noise=action_noise,
            param_noise=param_noise,
            action_noise_prob=args.action_noise_prob,
            param_noise_prob=args.param_noise_prob,
            id=sampler_id,
        )
        p = mp.Process(target=run_sampler, kwargs=dict(**params, **params_))
        p.start()
        processes.append(p)
        sampler_id += 1

    for p in processes:
        p.join()