Beispiel #1
0
    def __init__(self, name, action_space, cfg, shared_objects={}):
        self.name = name
        self.pidx = 0
        self.actions_no = len(action_space)
        self.target_update_freq = 400
        self.batch_size = batch_size = cfg.general.batch_size
        self.update_freq = batch_size
        self.hist_len = 1
        self.cuda = cfg.general.use_cuda
        self.torch_obs = True
        self.dtype = TorchTypes(self.cuda)
        self.clip_grad = 0.25

        self.hidden_size = 256
        self.max_reward = 0
        self.net = net = Policy(cfg, self.hidden_size)

        if self.cuda:
            net.cuda()

        self.target = get_model(cfg.model.name)(cfg.model)
        if self.cuda:
            self.net.cuda()
            self.target.cuda()

        self.exploration_strategy = get_batch_schedule(cfg.agent.exploration,
                                                       batch_size)
        self.batch_games = 0
        self.step_cnt = 0

        self.saved_transitions = []

        self._o, self._a, self._v = None, None, None

        self.live_idx = torch.linspace(0, batch_size-1, batch_size) \
                             .type(self.dtype.LongTensor)

        self.lr = .001
        self.gamma = .99

        self.optimizer = optim.Adam(self.net.parameters(), lr=self.lr)
        self.optimizer.zero_grad()

        self.hidden_state = self.net.init_hidden(self.batch_size)

        self.obs_select = torch.LongTensor(
            [0, 1, 2, 4, 5, 6, 7, 9, 10, 11, 12, 14, 15, 16,
             17]).type(self.dtype.LongTensor)
Beispiel #2
0
    def __init__(self, name, action_space, cfg, shared_objects={}):
        super(BetaDQNBatchAgent, self).__init__()

        self.name = name
        self.pidx = 0
        self.actions_no = len(action_space)
        self.target_update_freq = cfg.training.target_update
        self.batch_size = batch_size = cfg.general.batch_size
        self.update_freq = batch_size
        self.hist_len = cfg.model.hist_len
        self.cuda = cfg.general.use_cuda
        self.torch_obs = True
        self.dtype = TorchTypes(self.cuda)

        if "model" in shared_objects:
            self.net = net = shared_objects["model"]
            self.print_info("Training some shared model.")
        else:
            self.net = net = get_model(cfg.model.name)(cfg.model)
            if self.cuda:
                net.cuda()

        self.target = get_model(cfg.model.name)(cfg.model)
        if self.cuda:
            self.net.cuda()
            self.target.cuda()

        self.behaviour = DeterministicPolicy(self.net)
        self.exploration_strategy = get_batch_schedule(cfg.agent.exploration,
                                                       batch_size)
        self.ddqn = cfg.agent.ddqn
        self.algorithm = DQNPolicyImprovement(self.net, self.target, cfg,
                                              self.ddqn)
        self.batch = []
        self.history = []

        self.batch_games = 0
        self.step_cnt = 0
        self._o = None
        self._a = None
        self.live_idx = torch.linspace(0, batch_size-1, batch_size) \
                             .type(self.dtype.LongTensor)
Beispiel #3
0
    def __init__(self, name, action_space, cfg, shared_objects={}):
        super(Village_ActorCritic, self).__init__()

        self.name = name
        self.pidx = 0
        self.actions_no = len(action_space)
        self.target_update_freq = 400
        self.batch_size = batch_size = cfg.general.batch_size
        self.update_freq = batch_size
        self.hist_len = 1
        self.cuda = cfg.general.use_cuda
        self.torch_obs = True
        self.dtype = TorchTypes(self.cuda)

        #TODO move to
        self.net = net = Policy(cfg)
        if self.cuda:
            net.cuda()

        self.target = get_model(cfg.model.name)(cfg.model)
        if self.cuda:
            self.net.cuda()
            self.target.cuda()

        self.exploration_strategy = get_batch_schedule(cfg.agent.exploration,
                                                       batch_size)
        self.batch_games = 0
        self.step_cnt = 0

        self.saved_transitions = []

        self._o, self._a, self._v = None, None, None

        self.live_idx = torch.linspace(0, batch_size-1, batch_size) \
                             .type(self.dtype.LongTensor)

        self.lr = .001
        self.gamma = cfg.agent.gamma

        self.optimizer = optim.Adam(self.net.parameters(), lr=self.lr)
        self.optimizer.zero_grad()
Beispiel #4
0
    def __init__(self, name, action_space, cfg, shared_objects={}):
        super(EpisodicRDQNAgent, self).__init__()

        self.name = name
        self.actions_no = len(action_space)
        self.batch_size = batch_size = cfg.general.batch_size
        self.cuda = cfg.general.use_cuda
        self.dtype = TorchTypes(self.cuda)

        if "model" in shared_objects:
            self.net = net = shared_objects["model"]
            self.print_info("Training some shared model.")
        else:
            self.net = net = get_model(cfg.model.name)(cfg.model)
            if self.cuda:
                net.cuda()

        Optimizer = getattr(optim, cfg.training.algorithm)
        optim_args = vars(cfg.training.algorithm_args)
        self.optimizer = Optimizer(net.parameters(), **optim_args)

        self.losses = []

        self.exploration_strategy = get_batch_schedule(cfg.agent.exploration,
                                                       batch_size)

        self.last_q = None
        self.last_a = None
        self.last_aux = None

        self.live_idx = torch.linspace(0, batch_size-1, batch_size) \
                             .type(self.dtype.LongTensor)
        self.prev_state = {}
        self.crt_step = -1

        self.loss_coeff = {k: v for [k,v] in cfg.model.auxiliary_tasks}
Beispiel #5
0
import torch
from methods import get_batch_schedule

partition = [["constant", 0.2, [.5]], ["constant", 0.2, [0.1]],
             ["linear", 0.6, [1.0, 0.005, 100]]]

sch = get_batch_schedule(partition, 20)

for x in sch:
    print(x)
    print(torch.bernoulli(x).long().nonzero().squeeze(1))
    input("Push!")