Beispiel #1
0
def run():
    args = parser.parse_args()
    args.task = 0
    args.f_num = util.parse_to_num(args.f_num)
    args.f_stride = util.parse_to_num(args.f_stride)
    args.f_size = util.parse_to_num(args.f_size)
    args.branch = util.parse_to_num(args.branch)

    env = new_env(args)
    args.meta_dim = 0 if not hasattr(env, 'meta') else len(env.meta())
    device = '/gpu:0' if args.gpu > 0 else '/cpu:0'
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.2)
    config = tf.ConfigProto(device_filters=device,
                            gpu_options=gpu_options,
                            allow_soft_placement=True)
    with tf.Session(config=config) as sess:
        if args.alg == 'A3C':
            model_type = 'policy'
        elif args.alg == 'Q':
            model_type = 'q'
        elif args.alg == 'VPN':
            model_type = 'vpn'
        else:
            raise ValueError('Invalid algorithm: ' + args.alg)
        with tf.device(device):
            with tf.variable_scope("local/learner"):
                agent = eval("model." + args.model)(
                    env.observation_space.shape,
                    env.action_space.n,
                    type=model_type,
                    gamma=args.gamma,
                    dim=args.dim,
                    f_num=args.f_num,
                    f_stride=args.f_stride,
                    f_size=args.f_size,
                    f_pad=args.f_pad,
                    branch=args.branch,
                    meta_dim=args.meta_dim)
                print("Num parameters: %d" % agent.num_param)

            saver = tf.train.Saver()
            saver.restore(sess, args.checkpoint)
        np.random.seed(args.seed)
        reward = evaluate(env, agent, args.n_play, eps=args.eps)
        print("Reward: %.2f" % (reward))
def run():
    args = parser.parse_args()
    args.task = 0
    args.f_num = util.parse_to_num(args.f_num)
    args.f_stride = util.parse_to_num(args.f_stride)
    args.f_size = util.parse_to_num(args.f_size)
    args.branch = util.parse_to_num(args.branch)

    env = new_env(args)
    args.meta_dim = 0
    device = '/gpu:0' if args.gpu > 0 else '/cpu:0'
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.2)
    config = tf.ConfigProto(device_filters=device,
                            gpu_options=gpu_options,
                            allow_soft_placement=True)
    with tf.Session(config=config) as sess:
        model_type = 'vpn'
        with tf.device(device):
            with tf.variable_scope("local/learner"):
                agent = eval("model." + args.model)(
                    env.observation_space.shape,
                    env.action_space.n,
                    type=model_type,
                    gamma=args.gamma,
                    dim=args.dim,
                    f_num=args.f_num,
                    f_stride=args.f_stride,
                    f_size=args.f_size,
                    f_pad=args.f_pad,
                    branch=args.branch,
                    meta_dim=args.meta_dim)
                print("Num parameters: %d" % agent.num_param)

            saver = tf.train.Saver()
            saver.restore(sess, args.checkpoint)
        np.random.seed(args.seed)
        evaluate(env, agent, args.n_play, eps=args.eps)
Beispiel #3
0
def run(envs=None):
    stats = []

    args = parser.parse_args()
    args.task = 0
    args.f_num = util.parse_to_num(args.f_num)
    args.f_stride = util.parse_to_num(args.f_stride)
    args.f_size = util.parse_to_num(args.f_size)
    args.branch = util.parse_to_num(args.branch)

    env = new_env(args)
    if envs is None or not envs:
        envs = [env]

    args.meta_dim = 0 if env.meta() is None else len(env.meta())
    device = '/gpu:0' if args.gpu > 0 else '/cpu:0'
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.2)
    branches = [
        [4, 4, 4],
        [4, 4, 4, 4],
        [4, 4, 4, 4, 4],
        [4, 1, 4, 1, 4],
        [1],
        [1, 1, 1],
        [1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 1],
        [4, 4, 4, 1],
        [4, 4, 4, 1, 1],
        [4, 4, 4, 1, 1, 1],
        [1, 4, 4, 4],
        [1, 1, 1, 4, 4, 4],
        [1, 1, 1, 4, 4, 4],
    ]
    paths = [
        f'/home/ikaynov/Repositories/value-prediction-network/Experiments/{x}/best'
        for x in [
            's10_t20_g8_444',
            's10_t20_g8_4444',
            's10_t20_g8_44444',
        ]
    ]
    count = 0
    count_max = len(paths) * len(branches)
    for ck in paths:
        for branch_type in branches:
            print(f'Executing {count}/{count_max}.')
            config = tf.ConfigProto(device_filters=device,
                                    gpu_options=gpu_options,
                                    allow_soft_placement=True)
            tf.reset_default_graph()
            with tf.Session(config=config) as sess:
                sess.run(tf.global_variables_initializer())
                if args.alg == 'A3C':
                    model_type = 'policy'
                elif args.alg == 'Q':
                    model_type = 'q'
                elif args.alg == 'VPN':
                    model_type = 'vpn'
                else:
                    raise ValueError('Invalid algorithm: ' + args.alg)
                with tf.device(device):

                    # np.random.seed(args.seed)

                    with tf.variable_scope("local/learner"):
                        agent = eval("model." + args.model)(
                            env.observation_space.shape,
                            env.action_space.n,
                            type=model_type,
                            gamma=args.gamma,
                            dim=args.dim,
                            f_num=args.f_num,
                            f_stride=args.f_stride,
                            f_size=args.f_size,
                            f_pad=args.f_pad,
                            branch=branch_type,
                            meta_dim=args.meta_dim)
                        agent.train_branch = str([
                            int(x)
                            for x in list(ck.split('/')[-2].split('_')[-1])
                        ])
                        print("Num parameters: %d" % agent.num_param)
                    saver = tf.train.Saver()
                    saver.restore(sess, ck)

                    for i, env in enumerate(envs):
                        run_stats = evaluate(env,
                                             agent,
                                             args.n_play,
                                             eps=args.eps)
                        stats += run_stats
            count += 1
    return stats