Beispiel #1
0
    # setup the environment
    env = MultiObjectiveEnv(args.env_name)

    # generate an agent for plotting
    agent = None
    if args.method == 'crl-naive':
        from crl.naive.meta import MetaAgent
    elif args.method == 'crl-envelope':
        from crl.envelope.meta import MetaAgent
    elif args.method == 'crl-energy':
        from crl.energy.meta import MetaAgent
    model = torch.load("{}{}.pkl".format(
        args.save, "m.{}_e.{}_n.{}".format(args.model, args.env_name,
                                           args.name)))
    agent = MetaAgent(model, args, is_train=False)

    # compute opt
    opt_x = []
    opt_y = []
    q_x = []
    q_y = []
    act_x = []
    act_y = []
    real_sol = np.stack((dis_treasure, dis_time))

    policy_loss = np.inf
    predict_loss = np.inf

    for i in range(2000):
        w = np.random.randn(2)
Beispiel #2
0
    # args.episode_num = 600
    # setup the environment
    env = MultiObjectiveEnv(args.env_name)

    # get state / action / reward sizes
    state_size = len(env.state_spec)
    action_size = env.action_spec[2][1] - env.action_spec[2][0]
    reward_size = len(env.reward_spec)

    # generate an agent for initial training
    agent = None
    if args.method == 'crl-naive':
        from crl.naive.meta import MetaAgent
        from crl.naive.models import get_new_model
    elif args.method == 'crl-envelope':
        from crl.envelope.meta import MetaAgent
        from crl.envelope.models import get_new_model
    elif args.method == 'crl-energy':
        from crl.energy.meta import MetaAgent
        from crl.energy.models import get_new_model

    if args.serialize:
        model = torch.load("{}{}.pkl".format(
            args.save, "m.{}_e.{}_n.{}".format(args.model, args.env_name,
                                               args.name)))
    else:
        model = get_new_model(args.model, state_size, action_size, reward_size)
    agent = MetaAgent(model, args, is_train=True)

    train(env, agent, args)
Beispiel #3
0
# setup the environment
env = MultiObjectiveEnv(args.env_name)

# generate an agent for plotting
agent = None
if args.method == 'crl-naive':
    from crl.naive.meta import MetaAgent
elif args.method == 'crl-envelope':
    from crl.envelope.meta import MetaAgent
elif args.method == 'crl-energy':
    from crl.energy.meta import MetaAgent

model = torch.load("{}{}.pkl".format(
    args.save, "m.{}_e.{}_n.{}".format(args.model, args.env_name, args.name)))
agent = MetaAgent(model, args, is_train=False)

REPEAT = 1

# unknown pref
unknown_w = np.array([0.0, 0.0, 0.0, 0.0, 1.0, 0.0])

bbbest_param = []
for iii in range(5):

    pref_param = np.array(
        [1.0 / 6.0, 1.0 / 6.0, 1.0 / 6.0, 1.0 / 6.0, 1.0 / 6.0, 1.0 / 6.0])
    explore_w = generate_w(1, pref_param)
    max_target = 0

    sample_episode = 0