Ejemplo n.º 1
0
    def __init__(self, args):
        vis = visdom.Visdom()
        assert vis.check_connection()
        FloatTensor = torch.cuda.FloatTensor
        LongTensor = torch.cuda.LongTensor
        ByteTensor = torch.cuda.ByteTensor
        Tensor = FloatTensor
        args = parser.parse_args()
        # setup the environment
        self.env = MultiObjectiveEnv(args.env_name)
        # get state / action / reward sizes
        state_size = len(env.state_spec)
        action_size = env.action_spec[2][1] - env.action_spec[2][0]
        reward_size = len(env.reward_spec)


        from crl.envelope.meta import MetaAgent
        from crl.envelope.models import get_new_model

        model = get_new_model(args.model, state_size, action_size, reward_size)
        agent = MetaAgent(model, args, is_train=True)

        state_size = len(env.state_spec)
        action_size = env.action_spec[2][1] - env.action_spec[2][0]
        reward_size = len(env.reward_spec)

        model = get_new_model(args.model, state_size, action_size, reward_size)
        dicts = torch.load("{}{}.pth.tar".format(args.save,
                                                 "m.{}_e.{}_n.{}".format(args.model, args.env_name, args.name)))
        model.load_state_dict(dicts['state_dict'])
        self.agent = MetaAgent(model, args, is_train=False)
Ejemplo n.º 2
0
class Paint_Booth():
    def __init__(self, args):
        vis = visdom.Visdom()
        assert vis.check_connection()
        FloatTensor = torch.cuda.FloatTensor
        LongTensor = torch.cuda.LongTensor
        ByteTensor = torch.cuda.ByteTensor
        Tensor = FloatTensor
        args = parser.parse_args()
        # setup the environment
        self.env = MultiObjectiveEnv(args.env_name)
        # get state / action / reward sizes
        state_size = len(env.state_spec)
        action_size = env.action_spec[2][1] - env.action_spec[2][0]
        reward_size = len(env.reward_spec)


        from crl.envelope.meta import MetaAgent
        from crl.envelope.models import get_new_model

        model = get_new_model(args.model, state_size, action_size, reward_size)
        agent = MetaAgent(model, args, is_train=True)

        state_size = len(env.state_spec)
        action_size = env.action_spec[2][1] - env.action_spec[2][0]
        reward_size = len(env.reward_spec)

        model = get_new_model(args.model, state_size, action_size, reward_size)
        dicts = torch.load("{}{}.pth.tar".format(args.save,
                                                 "m.{}_e.{}_n.{}".format(args.model, args.env_name, args.name)))
        model.load_state_dict(dicts['state_dict'])
        self.agent = MetaAgent(model, args, is_train=False)

    def run_one_episode(self, w):
        w = np.abs(w) / np.linalg.norm(w, ord=1)
        w_e = w / np.linalg.norm(w, ord=2)
        ttrw = np.array([0.0, 0.0])
        terminal = False
        self.env.reset()
        cnt = 0
        while not terminal:
            state = self.env.observe()
            mask = self.env.env.get_action_out_mask()
            action = self.agent.act(state, preference=torch.from_numpy(w).type(FloatTensor), mask=mask)
            next_state, reward, terminal = self.env.step(action)
            reward[0] = 1 - reward[0]
            reward[1] = self.env.env.get_distortion(absolute=True, tollerance=15) / 5
            if cnt > 300:
                terminal = True
            ttrw = ttrw + reward  # * np.power(args.gamma, cnt)
            cnt += 1
        # ttrw_w = w.dot(ttrw) * w_e
        return ttrw_w
Ejemplo n.º 3
0
            act_1,
            act_2,
            #    q__max,
            loss / cnt)
    if num_eps + 1 % 500 == 0:
        agent.save(
            args.save, "m.{}_e.{}_n.{}".format(args.model, args.env_name,
                                               args.name))
    # agent.save(args.save, "m.{}_e.{}_n.{}".format(args.model, args.env_name, args.name))


if __name__ == '__main__':
    args = parser.parse_args()
    # args.episode_num = 600
    # setup the environment
    env = MultiObjectiveEnv(args.env_name)

    # get state / action / reward sizes
    state_size = len(env.state_spec)
    action_size = env.action_spec[2][1] - env.action_spec[2][0]
    reward_size = len(env.reward_spec)

    # generate an agent for initial training
    agent = None
    if args.method == 'crl-naive':
        from crl.naive.meta import MetaAgent
        from crl.naive.models import get_new_model
    elif args.method == 'crl-envelope':
        from crl.envelope.meta import MetaAgent
        from crl.envelope.models import get_new_model
    elif args.method == 'crl-energy':
Ejemplo n.º 4
0
                if np.linalg.norm(a - b, ord=1) < 0.20 * np.linalg.norm(b):
                    cnt += 1.0
                    break
            elif base == 2:
                if np.linalg.norm(a - b, ord=1) < 0.3:
                    cnt += 1.0
                    break
    return cnt / len(A)


################# Control Frontier #################

if args.pltcontrol:

    # setup the environment
    env = MultiObjectiveEnv(args.env_name)

    # generate an agent for plotting
    agent = None
    if args.method == 'crl-naive':
        from crl.naive.meta import MetaAgent
    elif args.method == 'crl-envelope':
        from crl.envelope.meta import MetaAgent
    elif args.method == 'crl-energy':
        from crl.energy.meta import MetaAgent
    model = torch.load("{}{}.pkl".format(
        args.save, "m.{}_e.{}_n.{}".format(args.model, args.env_name,
                                           args.name)))
    agent = MetaAgent(model, args, is_train=False)

    # compute opt