コード例 #1
0
def main():
    import logging
    logging.basicConfig(level=logging.DEBUG)

    parser = argparse.ArgumentParser()
    parser.add_argument('processes', type=int)
    parser.add_argument('--seed', type=int, default=None)
    parser.add_argument('--outdir', type=str, default=None)
    parser.add_argument('--scenario', type=str, default='basic')
    parser.add_argument('--t-max', type=int, default=5)
    parser.add_argument('--beta', type=float, default=1e-2)
    parser.add_argument('--profile', action='store_true')
    parser.add_argument('--steps', type=int, default=8 * 10**7)
    parser.add_argument('--lr', type=float, default=7e-4)
    parser.add_argument('--eval-frequency', type=int, default=10**5)
    parser.add_argument('--eval-n-runs', type=int, default=10)
    parser.add_argument('--use-lstm', action='store_true')
    parser.add_argument('--window-visible', action='store_true')
    parser.set_defaults(window_visible=False)
    parser.set_defaults(use_lstm=False)
    args = parser.parse_args()

    if args.seed is not None:
        random_seed.set_random_seed(args.seed)

    # Simultaneously launching multiple vizdoom processes makes program stuck,
    # so use the global lock
    env_lock = mp.Lock()

    def make_env(process_idx, test):
        with env_lock:
            return doom_env.DoomEnv(window_visible=args.window_visible,
                                    scenario=args.scenario)

    n_actions = 3

    def model_opt():
        if args.use_lstm:
            model = A3CLSTM(n_actions)
        else:
            model = A3CFF(n_actions)
        opt = rmsprop_async.RMSpropAsync(lr=args.lr, eps=1e-1, alpha=0.99)
        opt.setup(model)
        opt.add_hook(chainer.optimizer.GradientClipping(40))
        return model, opt

    run_a3c.run_a3c(args.processes,
                    make_env,
                    model_opt,
                    phi,
                    t_max=args.t_max,
                    beta=args.beta,
                    profile=args.profile,
                    steps=args.steps,
                    eval_frequency=args.eval_frequency,
                    eval_n_runs=args.eval_n_runs,
                    args=args)
コード例 #2
0
ファイル: train_a3c_doom.py プロジェクト: BlGene/async-rl
def main():
    import logging
    logging.basicConfig(level=logging.DEBUG)

    parser = argparse.ArgumentParser()
    parser.add_argument('processes', type=int)
    parser.add_argument('--seed', type=int, default=None)
    parser.add_argument('--outdir', type=str, default=None)
    parser.add_argument('--scenario', type=str, default='basic')
    parser.add_argument('--t-max', type=int, default=5)
    parser.add_argument('--beta', type=float, default=1e-2)
    parser.add_argument('--profile', action='store_true')
    parser.add_argument('--steps', type=int, default=8 * 10 ** 7)
    parser.add_argument('--lr', type=float, default=7e-4)
    parser.add_argument('--eval-frequency', type=int, default=10 ** 5)
    parser.add_argument('--eval-n-runs', type=int, default=10)
    parser.add_argument('--use-lstm', action='store_true')
    parser.add_argument('--window-visible', action='store_true')
    parser.set_defaults(window_visible=False)
    parser.set_defaults(use_lstm=False)
    args = parser.parse_args()

    if args.seed is not None:
        random_seed.set_random_seed(args.seed)

    # Simultaneously launching multiple vizdoom processes makes program stuck,
    # so use the global lock
    env_lock = mp.Lock()

    def make_env(process_idx, test):
        with env_lock:
            return doom_env.DoomEnv(window_visible=args.window_visible,
                                    scenario=args.scenario)

    n_actions = 3

    def model_opt():
        if args.use_lstm:
            model = A3CLSTM(n_actions)
        else:
            model = A3CFF(n_actions)
        opt = rmsprop_async.RMSpropAsync(lr=args.lr, eps=1e-1, alpha=0.99)
        opt.setup(model)
        opt.add_hook(chainer.optimizer.GradientClipping(40))
        return model, opt

    run_a3c.run_a3c(args.processes, make_env, model_opt, phi, t_max=args.t_max,
                    beta=args.beta, profile=args.profile, steps=args.steps,
                    eval_frequency=args.eval_frequency,
                    eval_n_runs=args.eval_n_runs, args=args)
コード例 #3
0
ファイル: demo_a3c_doom.py プロジェクト: xjwxjw/async-rl
def main():
    import logging
    logging.basicConfig(level=logging.DEBUG)

    parser = argparse.ArgumentParser()
    parser.add_argument('model', type=str)
    parser.add_argument('--seed', type=int, default=0)
    parser.add_argument('--sleep', type=float, default=0)
    parser.add_argument('--scenario', type=str, default='basic')
    parser.add_argument('--n-runs', type=int, default=10)
    parser.add_argument('--use-lstm', action='store_true')
    parser.add_argument('--window-visible', action='store_true')
    parser.add_argument('--deterministic', action='store_true')
    parser.add_argument('--random', action='store_true')
    parser.set_defaults(window_visible=False)
    parser.set_defaults(use_lstm=False)
    parser.set_defaults(deterministic=False)
    parser.set_defaults(random=False)
    args = parser.parse_args()

    random_seed.set_random_seed(args.seed)

    n_actions = doom_env.DoomEnv(window_visible=False,
                                 scenario=args.scenario).n_actions

    if not args.random:
        if args.use_lstm:
            model = A3CLSTM(n_actions)
        else:
            model = A3CFF(n_actions)
        serializers.load_hdf5(args.model, model)

    scores = []
    env = doom_env.DoomEnv(window_visible=args.window_visible,
                           scenario=args.scenario,
                           sleep=args.sleep)
    for i in range(args.n_runs):
        if args.random:
            score = eval_single_random_run(env)
        else:
            score = eval_single_run(env,
                                    model,
                                    phi,
                                    deterministic=args.deterministic)
        print('Run {}: {}'.format(i, score))
        scores.append(score)
    print('Average: {}'.format(sum(scores) / args.n_runs))
コード例 #4
0
ファイル: demo_a3c_ale.py プロジェクト: nerdylinius/async-rl
def main():

    import logging
    logging.basicConfig(level=logging.DEBUG)

    parser = argparse.ArgumentParser()
    parser.add_argument('rom', type=str)
    parser.add_argument('model', type=str)
    parser.add_argument('--seed', type=int, default=0)
    parser.add_argument('--use-sdl', action='store_true')
    parser.add_argument('--n-runs', type=int, default=10)
    parser.add_argument('--deterministic', action='store_true')
    parser.set_defaults(use_sdl=False)
    parser.set_defaults(deterministic=False)
    args = parser.parse_args()

    random_seed.set_random_seed(args.seed)

    n_actions = ale.ALE(args.rom).number_of_actions

    def pv_func(model, state):
        head, pi, v = model
        out = head(state)
        return pi(out), v(out)

    # Load an A3C-DQN model
    head = dqn_head.NIPSDQNHead()
    pi = policy.FCSoftmaxPolicy(head.n_output_channels, n_actions)
    v = v_function.FCVFunction(head.n_output_channels)
    model = chainer.ChainList(head, pi, v)
    serializers.load_hdf5(args.model, model)

    def p_func(s):
        head, pi, _ = model
        out = head(s)
        return pi(out)

    scores = []
    for i in range(args.n_runs):
        score = eval_performance(
            args.rom, p_func, deterministic=args.deterministic,
            use_sdl=args.use_sdl)
        print('Run {}: {}'.format(i, score))
        scores.append(score)
    print('Average: {}'.format(sum(scores) / args.n_runs))
コード例 #5
0
ファイル: demo_a3c_doom.py プロジェクト: BlGene/async-rl
def main():
    import logging
    logging.basicConfig(level=logging.DEBUG)

    parser = argparse.ArgumentParser()
    parser.add_argument('model', type=str)
    parser.add_argument('--seed', type=int, default=0)
    parser.add_argument('--sleep', type=float, default=0)
    parser.add_argument('--scenario', type=str, default='basic')
    parser.add_argument('--n-runs', type=int, default=10)
    parser.add_argument('--use-lstm', action='store_true')
    parser.add_argument('--window-visible', action='store_true')
    parser.add_argument('--deterministic', action='store_true')
    parser.add_argument('--random', action='store_true')
    parser.set_defaults(window_visible=False)
    parser.set_defaults(use_lstm=False)
    parser.set_defaults(deterministic=False)
    parser.set_defaults(random=False)
    args = parser.parse_args()

    random_seed.set_random_seed(args.seed)

    n_actions = doom_env.DoomEnv(
        window_visible=False, scenario=args.scenario).n_actions

    if not args.random:
        if args.use_lstm:
            model = A3CLSTM(n_actions)
        else:
            model = A3CFF(n_actions)
        serializers.load_hdf5(args.model, model)

    scores = []
    env = doom_env.DoomEnv(window_visible=args.window_visible,
                           scenario=args.scenario,
                           sleep=args.sleep)
    for i in range(args.n_runs):
        if args.random:
            score = eval_single_random_run(env)
        else:
            score = eval_single_run(
                env, model, phi, deterministic=args.deterministic)
        print('Run {}: {}'.format(i, score))
        scores.append(score)
    print('Average: {}'.format(sum(scores) / args.n_runs))
コード例 #6
0
def main():

    import logging
    logging.basicConfig(level=logging.DEBUG)

    parser = argparse.ArgumentParser()
    parser.add_argument('rom', type=str)
    parser.add_argument('model', type=str)
    parser.add_argument('--seed', type=int, default=0)
    parser.add_argument('--use-sdl', action='store_true')
    parser.add_argument('--n-runs', type=int, default=10)
    parser.add_argument('--deterministic', action='store_true')
    parser.add_argument('--record-screen-dir', type=str, default=None)
    parser.add_argument('--use-lstm', action='store_true')
    parser.set_defaults(use_sdl=False)
    parser.set_defaults(use_lstm=False)
    parser.set_defaults(deterministic=False)
    args = parser.parse_args()

    random_seed.set_random_seed(args.seed)

    n_actions = ale.ALE(args.rom).number_of_actions

    # Load an A3C-DQN model
    if args.use_lstm:
        model = A3CLSTM(n_actions)
    else:
        model = A3CFF(n_actions)
    serializers.load_hdf5(args.model, model)

    scores = []
    for i in range(args.n_runs):
        episode_record_dir = None
        if args.record_screen_dir is not None:
            episode_record_dir = os.path.join(args.record_screen_dir, str(i))
            os.makedirs(episode_record_dir)
        score = eval_performance(args.rom,
                                 model,
                                 deterministic=args.deterministic,
                                 use_sdl=args.use_sdl,
                                 record_screen_dir=episode_record_dir)
        print('Run {}: {}'.format(i, score))
        scores.append(score)
    print('Average: {}'.format(sum(scores) / args.n_runs))
コード例 #7
0
ファイル: demo_a3c_ale.py プロジェクト: BenjamWhite/async-rl
def main():

    import logging
    logging.basicConfig(level=logging.DEBUG)

    parser = argparse.ArgumentParser()
    parser.add_argument('rom', type=str)
    parser.add_argument('model', type=str)
    parser.add_argument('--seed', type=int, default=0)
    parser.add_argument('--use-sdl', action='store_true')
    parser.add_argument('--n-runs', type=int, default=10)
    parser.add_argument('--deterministic', action='store_true')
    parser.add_argument('--record-screen-dir', type=str, default=None)
    parser.add_argument('--use-lstm', action='store_true')
    parser.set_defaults(use_sdl=False)
    parser.set_defaults(use_lstm=False)
    parser.set_defaults(deterministic=False)
    args = parser.parse_args()

    random_seed.set_random_seed(args.seed)

    n_actions = ale.ALE(args.rom).number_of_actions

    # Load an A3C-DQN model
    if args.use_lstm:
        model = A3CLSTM(n_actions)
    else:
        model = A3CFF(n_actions)
    serializers.load_hdf5(args.model, model)

    scores = []
    for i in range(args.n_runs):
        episode_record_dir = None
        if args.record_screen_dir is not None:
            episode_record_dir = os.path.join(args.record_screen_dir, str(i))
            os.makedirs(episode_record_dir)
        score = eval_performance(
            args.rom, model, deterministic=args.deterministic,
            use_sdl=args.use_sdl, record_screen_dir=episode_record_dir)
        print('Run {}: {}'.format(i, score))
        scores.append(score)
    print('Average: {}'.format(sum(scores) / args.n_runs))
コード例 #8
0
ファイル: async.py プロジェクト: BenjamWhite/async-rl
 def set_seed_and_run(process_idx, run_func):
     random_seed.set_random_seed(np.random.randint(0, 2 ** 32))
     run_func(process_idx)
コード例 #9
0
def train(args):
    if not os.path.exists(args.out):
        os.makedirs(args.out)
    if args.gpu >= 0:
        cuda.check_cuda_available()
        cuda.get_device(args.gpu).use()
    if args.random_seed:
        set_random_seed(args.random_seed, (args.gpu,))

    user2index = load_dict(os.path.join(args.indir, USER_DICT_FILENAME))
    item2index = load_dict(os.path.join(args.indir, ITEM_DICT_FILENAME))
    (trimmed_word2count, word2index, aspect2index, opinion2index) = read_and_trim_vocab(
        args.indir, args.trimfreq
    )
    aspect_opinions = get_aspect_opinions(os.path.join(args.indir, TRAIN_FILENAME))

    export_params(
        args,
        user2index,
        item2index,
        trimmed_word2count,
        word2index,
        aspect2index,
        opinion2index,
        aspect_opinions,
    )

    src_aspect_score = SOURCE_ASPECT_SCORE.get(args.context, "aspect_score_efm")

    data_loader = DataLoader(
        args.indir,
        user2index,
        item2index,
        trimmed_word2count,
        word2index,
        aspect2index,
        opinion2index,
        aspect_opinions,
        src_aspect_score,
    )

    train_iter, val_iter = get_dataset_iterator(
        args.context, data_loader, args.batchsize
    )

    model = get_context_model(args, data_loader)

    if args.optimizer == "rmsprop":
        optimizer = O.RMSprop(lr=args.learning_rate, alpha=args.alpha)
    elif args.optimizer == "adam":
        optimizer = O.Adam(amsgrad=args.amsgrad)

    optimizer.setup(model)
    if args.grad_clip:
        optimizer.add_hook(GradientClipping(args.grad_clip))
    if args.gpu >= 0:
        model.to_gpu(args.gpu)

    updater = training.updaters.StandardUpdater(
        train_iter, optimizer, converter=convert, device=args.gpu
    )
    early_stop = triggers.EarlyStoppingTrigger(
        monitor="validation/main/loss",
        patients=args.patients,
        max_trigger=(args.epoch, "epoch"),
    )
    trainer = training.Trainer(updater, stop_trigger=early_stop, out=args.out)
    trainer.extend(
        extensions.Evaluator(val_iter, model, converter=convert, device=args.gpu)
    )
    trainer.extend(extensions.LogReport())
    trainer.extend(
        extensions.PrintReport(
            ["epoch", "main/loss", "validation/main/loss", "lr", "elapsed_time"]
        )
    )
    trainer.extend(
        extensions.PlotReport(
            ["main/loss", "validation/main/loss"], x_key="epoch", file_name="loss.png"
        )
    )
    trainer.extend(extensions.ProgressBar())
    trainer.extend(
        extensions.snapshot_object(model, MODEL_FILENAME),
        trigger=triggers.MinValueTrigger("validation/main/loss"),
    )
    trainer.extend(extensions.observe_lr())

    if args.optimizer in ["rmsprop"]:
        if args.schedule_lr:
            epoch_list = np.array(
                [i for i in range(1, int(args.epoch / args.stepsize) + 1)]
            ).astype(np.int32)
            value_list = args.learning_rate * args.lr_reduce ** epoch_list
            value_list[value_list < args.min_learning_rate] = args.min_learning_rate
            epoch_list *= args.stepsize
            epoch_list += args.begin_step
            trainer.extend(
                schedule_optimizer_value(epoch_list.tolist(), value_list.tolist())
            )

    trainer.run()