Python DQN.eval Examples

Programming Language: Python

Namespace/Package Name: network

Class/Type: DQN

Method/Function: eval

Examples at hotexamples.com: 3

Python DQN.eval - 3 examples found. These are the top rated real world Python examples of network.DQN.eval extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

DQN(25)

parameters(11)

load_state_dict(7)

state_dict(6)

cuda(3)

eval(3)

restoreQNetwork(2)

updateConstants(2)

training(2)

train(2)

setSess(2)

saveQNetwork(2)

remember(2)

init_state(2)

get_action(2)

evaluate(2)

computeTarget(2)

apply(1)

sample_memories(1)

start(1)

feed_forward(1)

stop(1)

epsilon_greedy(1)

update(1)

update_target_network(1)

Example #1

Show file

File: train.py Project: egordon9dev/MineRL-Project

test_time = False

n_steps = 8
n_actions = env.action_space.n
img_height = 64
img_width = 64
policy_net = None
network_path = "target_net.pt"
if os.path.exists(network_path):
    policy_net = torch.load(network_path)
    print("successfully loaded existing network from file: " + network_path)
else:
    policy_net = DQN(img_height, img_width, n_actions)
target_net = DQN(img_height, img_width, n_actions).to(device)
target_net.load_state_dict(policy_net.state_dict())
target_net.eval()
optimizer = optim.RMSprop(policy_net.parameters(), lr=LR)
memory = ReplayMemory(10000)

steps_done = 0
logfile = "train_log.txt"
with open(logfile, "w+") as f:
    f.write("CS4803 MineRL Project Logs:\n")
def append_log(s):
    with open(logfile, "a") as f:
        f.write(s + "\n")

def state_from_obs(obs):
    # get the camera image from the observation dict and convert the image
    # to the correct shape: (C, H, W)
    img = torch.tensor(obs["pov"] / 255.0, dtype=torch.float32)

Example #2

Show file

class Agent:
    def __init__(
        self,
        state_size,
        action_size,
        n_agents,
        buffer_size: int = 1e5,
        batch_size: int = 256,
        gamma: float = 0.995,
        tau: float = 1e-3,
        learning_rate: float = 7e-4,
        update_every: int = 4,
    ):
        """
        Initialize DQN agent using the agent-experience buffer

        Args:
            state_size (int): Size of the state observation returned by the
                environment
            action_size (int): Action space size
            n_agents (int): Number of agents in the environment
            buffer_size (int): Desired total experience buffer size
            batch_size (int): Mini-batch size
            gamma (float): Discount factor
            tau (float): For soft update of target parameters
            learning_rate (float): Learning rate
            update_every (int): Number of steps before target network update
        """

        self.state_size = state_size
        self.action_size = action_size
        self.n_agents = n_agents

        # Q-Networks
        self.policy_net = DQN(state_size, action_size).to(device)
        self.target_net = DQN(state_size, action_size).to(device)

        self.optimizer = optim.Adam(self.policy_net.parameters(),
                                    lr=learning_rate)
        self.memory = AgentReplayMemory(buffer_size, n_agents, state_size,
                                        device)

        self.t_step = 0

        self.update_every = update_every
        self.batch_size = batch_size
        self.gamma = gamma
        self.tau = tau

    def step(self, states, actions, rewards, next_steps, done):

        self.memory.push_agent_actions(states, actions, rewards, next_steps,
                                       done)

        self.t_step = (self.t_step + 1) % self.update_every
        if self.t_step == 0:
            if self.memory.at_capacity():
                experience = self.memory.sample(self.batch_size)
                self.learn(experience, self.gamma)

    def act(self, states, eps=0):
        states = torch.from_numpy(states).float().to(device)
        self.policy_net.eval()

        with torch.no_grad():
            action_values = self.policy_net(states)
        self.policy_net.train()

        r = np.random.random(size=self.n_agents)

        action_values = np.argmax(action_values.cpu().data.numpy(), axis=1)
        random_choices = np.random.randint(0,
                                           self.action_size,
                                           size=self.n_agents)

        return np.where(r > eps, action_values, random_choices)

    def learn(self, experiences, gamma):
        states, actions, rewards, next_states, dones = experiences

        criterion = torch.nn.MSELoss()
        self.policy_net.train()
        self.target_net.eval()

        # shape of output from the model (batch_size,action_dim) = (64,4)
        predicted_targets = self.policy_net(states).gather(1, actions)

        with torch.no_grad():
            labels_next = self.target_net(next_states).detach().max(
                1)[0].unsqueeze(1)

        # .detach() ->  Returns a new Tensor, detached from the current graph.
        labels = rewards + (gamma * labels_next * (1 - dones))

        loss = criterion(predicted_targets, labels).to(device)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        # ------------------- update target network ------------------- #
        self.soft_update(self.policy_net, self.target_net, self.tau)

    def soft_update(self, local_model, target_model, tau):
        """
        Soft update model parameters.

        θ_target = τ*θ_local + (1 - τ)*θ_target

        Args:
            local_model (PyTorch model): weights will be copied from
            target_model (PyTorch model): weights will be copied to
            tau (float): interpolation parameter
        """
        for target_param, local_param in zip(target_model.parameters(),
                                             local_model.parameters()):
            target_param.data.copy_(tau * local_param.data +
                                    (1 - tau) * target_param.data)

Example #3

Show file

class TrainNQL:
    def __init__(self, epi, cfg=dcfg, validation=False):
        #cpu or cuda
        torch.cuda.empty_cache()
        self.device = cfg.device  #torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.state_dim = cfg.proc_frame_size  #State dimensionality 84x84.
        self.state_size = cfg.state_size
        #self.t_steps= tsteps
        self.t_eps = cfg.t_eps
        self.minibatch_size = cfg.minibatch_size
        # Q-learning parameters
        self.discount = cfg.discount  #Discount factor.
        self.replay_memory = cfg.replay_memory
        self.bufferSize = cfg.bufferSize
        self.target_q = cfg.target_q
        self.validation = validation
        if (validation):
            self.episode = epi
        else:
            self.episode = int(epi) - 1
        self.cfg = cfg

        modelGray = 'results/ep' + str(self.episode) + '/modelGray.net'
        modelDepth = 'results/ep' + str(self.episode) + '/modelDepth.net'
        tModelGray = 'results/ep' + str(self.episode) + '/tModelGray.net'
        tModelDepth = 'results/ep' + str(self.episode) + '/tModelDepth.net'

        if os.path.exists(modelGray) and os.path.exists(modelDepth):
            print("Loading model")
            self.gray_policy_net = torch.load(modelGray).to(self.device)
            self.gray_target_net = torch.load(tModelGray).to(self.device)
            self.depth_policy_net = torch.load(modelDepth).to(self.device)
            self.depth_target_net = torch.load(tModelDepth).to(self.device)

        else:
            print("New model")
            self.gray_policy_net = DQN(noutputs=cfg.noutputs,
                                       nfeats=cfg.nfeats,
                                       nstates=cfg.nstates,
                                       kernels=cfg.kernels,
                                       strides=cfg.strides,
                                       poolsize=cfg.poolsize).to(self.device)
            self.gray_target_net = DQN(noutputs=cfg.noutputs,
                                       nfeats=cfg.nfeats,
                                       nstates=cfg.nstates,
                                       kernels=cfg.kernels,
                                       strides=cfg.strides,
                                       poolsize=cfg.poolsize).to(self.device)
            self.depth_policy_net = DQN(noutputs=cfg.noutputs,
                                        nfeats=cfg.nfeats,
                                        nstates=cfg.nstates,
                                        kernels=cfg.kernels,
                                        strides=cfg.strides,
                                        poolsize=cfg.poolsize).to(self.device)
            self.depth_target_net = DQN(noutputs=cfg.noutputs,
                                        nfeats=cfg.nfeats,
                                        nstates=cfg.nstates,
                                        kernels=cfg.kernels,
                                        strides=cfg.strides,
                                        poolsize=cfg.poolsize).to(self.device)

        if not validation and self.target_q and self.episode % self.target_q == 0:
            print("cloning")
            self.depth_policy_net = DQN(noutputs=cfg.noutputs,
                                        nfeats=cfg.nfeats,
                                        nstates=cfg.nstates,
                                        kernels=cfg.kernels,
                                        strides=cfg.strides,
                                        poolsize=cfg.poolsize).to(self.device)
            self.depth_target_net = DQN(noutputs=cfg.noutputs,
                                        nfeats=cfg.nfeats,
                                        nstates=cfg.nstates,
                                        kernels=cfg.kernels,
                                        strides=cfg.strides,
                                        poolsize=cfg.poolsize).to(self.device)

        self.gray_target_net.load_state_dict(self.gray_target_net.state_dict())
        self.gray_target_net.eval()

        self.depth_target_net.load_state_dict(
            self.depth_target_net.state_dict())
        self.depth_target_net.eval()

        self.gray_optimizer = optim.RMSprop(self.gray_policy_net.parameters())
        self.depth_optimizer = optim.RMSprop(
            self.depth_policy_net.parameters())
        self.memory = ReplayMemory(self.replay_memory)

    def get_tensor_from_image(self, file):
        convert = T.Compose([
            T.ToPILImage(),
            T.Resize((self.state_dim, self.state_dim),
                     interpolation=Image.BILINEAR),
            T.ToTensor()
        ])
        screen = Image.open(file)
        screen = np.ascontiguousarray(screen, dtype=np.float32) / 255
        screen = torch.from_numpy(screen)
        screen = convert(screen).unsqueeze(0).to(self.device)
        return screen

    def get_data(self, episode, tsteps):
        #images=torch.Tensor(tsteps,self.state_size,self.state_dim,self.state_dim).to(self.device)
        #depths=torch.Tensor(tsteps,self.state_size,self.state_dim,self.state_dim).to(self.device)
        images = []
        depths = []
        dirname_rgb = 'dataset/RGB/ep' + str(episode)
        dirname_dep = 'dataset/Depth/ep' + str(episode)
        for step in range(tsteps):
            #proc_image=torch.Tensor(self.state_size,self.state_dim,self.state_dim).to(self.device)
            #proc_depth=torch.Tensor(self.state_size,self.state_dim,self.state_dim).to(self.device)
            proc_image = []
            proc_depth = []

            dirname_rgb = 'dataset/RGB/ep' + str(episode)
            dirname_dep = 'dataset/Depth/ep' + str(episode)
            for i in range(self.state_size):
                grayfile = dirname_rgb + '/image_' + str(step + 1) + '_' + str(
                    i + 1) + '.png'
                depthfile = dirname_dep + '/depth_' + str(
                    step + 1) + '_' + str(i + 1) + '.png'
                #proc_image[i] = self.get_tensor_from_image(grayfile)
                #proc_depth[i] = self.get_tensor_from_image(depthfile)
                proc_image.append(grayfile)
                proc_depth.append(depthfile)
            #images[step]=proc_image
            #depths[step]=proc_depth
            images.append(proc_image)
            depths.append(proc_depth)
        return images, depths

    def load_data(self):

        rewards = torch.load('files/reward_history.dat')
        actions = torch.load('files/action_history.dat')
        ep_rewards = torch.load('files/ep_rewards.dat')

        print("Loading images")

        best_scores = range(len(actions))
        buffer_selection_mode = 'default'

        if (buffer_selection_mode == 'success_handshake'):
            eps_values = []
            for i in range(len(actions)):

                hspos = 0
                hsneg = 0
                for step in range(len(actions[i])):
                    if (len(actions[i]) > 0):
                        if actions[i][step] == 3:
                            if rewards[i][step] > 0:
                                hspos = hspos + 1
                            elif rewards[i][step] == -0.1:
                                hsneg = hsneg + 1
                accuracy = float(((hspos) / (hspos + hsneg)))
                eps_values.append(accuracy)

            best_scores = np.argsort(eps_values)

        for i in best_scores:
            print('Ep: ', i + 1)
            dirname_gray = 'dataset/RGB/ep' + str(i + 1)
            dirname_dep = 'dataset/Depth/ep' + str(i + 1)
            files = []
            if (os.path.exists(dirname_gray)):
                files = os.listdir(dirname_gray)

            k = 0
            for file in files:
                if re.match(r"image.*\.png", file):
                    k = k + 1
            k = int(k / 8)
            while (k % 4 != 0):
                k = k - 1
            if (k > self.bufferSize):
                k = self.bufferSize
            print(k)

            #os.system("free -h")
            #with torch.no_grad():
            images, depths = self.get_data(i + 1, k)
            print("Loading done")

            for step in range(k - 1):
                #print(len(rewards),i)
                #print(len(rewards[i]), step)
                reward = self.cfg.neutral_reward
                if rewards[i][step] >= 1:
                    reward = self.cfg.hs_success_reward
                elif rewards[i][step] < 0:
                    reward = self.cfg.hs_fail_reward
                reward = torch.tensor([reward], device=self.device)
                action = torch.tensor([[actions[i][step]]],
                                      device=self.device,
                                      dtype=torch.long)
                #image = images[step].unsqueeze(0).to(self.device)
                #depth = depths[step].unsqueeze(0).to(self.device)
                #next_image = images[step+1].unsqueeze(0).to(self.device)
                #next_depth = depths[step+1].unsqueeze(0).to(self.device)
                image = images[step]
                depth = depths[step]
                next_image = images[step + 1]
                next_depth = depths[step + 1]
                self.memory.push(image, depth, action, next_image, next_depth,
                                 reward)
                #print("Memory size: ",getsizeof(self.memory))
                #torch.cuda.empty_cache()

    def train(self):
        if len(self.memory) < self.minibatch_size:
            return
        for i in range(0, len(self.memory), self.minibatch_size):
            #transitions = self.memory.sample(self.minibatch_size)
            transitions = self.memory.pull(self.minibatch_size)

            print('Batch train: ' + str(int(i / self.minibatch_size) + 1) +
                  "/" + str(int(len(self.memory) / self.minibatch_size) + 1))

            aux_transitions = []
            for t in transitions:
                proc_sgray = torch.Tensor(self.state_size, self.state_dim,
                                          self.state_dim).to(self.device)
                proc_sdepth = torch.Tensor(self.state_size, self.state_dim,
                                           self.state_dim).to(self.device)
                proc_next_sgray = torch.Tensor(self.state_size, self.state_dim,
                                               self.state_dim).to(self.device)
                proc_next_sdepth = torch.Tensor(self.state_size,
                                                self.state_dim,
                                                self.state_dim).to(self.device)
                count = 0
                for sgray, sdepth, next_sgray, next_sdepth in zip(
                        t.sgray, t.sdepth, t.next_sgray, t.next_sdepth):
                    proc_sgray[count] = self.get_tensor_from_image(sgray)
                    proc_sdepth[count] = self.get_tensor_from_image(sdepth)
                    proc_next_sgray[count] = self.get_tensor_from_image(
                        next_sgray)
                    proc_next_sdepth[count] = self.get_tensor_from_image(
                        next_sdepth)
                    count += 1

                proc_sgray = proc_sgray.unsqueeze(0).to(self.device)
                proc_sdepth = proc_sdepth.unsqueeze(0).to(self.device)
                proc_next_sgray = proc_next_sgray.unsqueeze(0).to(self.device)
                proc_next_sdepth = proc_next_sdepth.unsqueeze(0).to(
                    self.device)
                #('sgray','sdepth','action','next_sgray','next_sdepth','reward')
                one_transition = Transition(proc_sgray, proc_sdepth, t.action,
                                            proc_next_sgray, proc_next_sdepth,
                                            t.reward)
                aux_transitions.append(one_transition)
            transitions = aux_transitions

            # Transpose the batch (see https://stackoverflow.com/a/19343/3343043 for
            # detailed explanation). This converts batch-array of Transitions
            # to Transition of batch-arrays.
            batch = Transition(*zip(*transitions))
            #print(batch.sgray)

            # Compute a mask of non-final states and concatenate the batch elements
            # (a final state would've been the one after which simulation ended)
            gray_non_final_mask = torch.tensor(tuple(
                map(lambda s: s is not None, batch.next_sgray)),
                                               device=self.device,
                                               dtype=torch.bool)
            gray_non_final_next_states = torch.cat(
                [s for s in batch.next_sgray if s is not None])

            depth_non_final_mask = torch.tensor(tuple(
                map(lambda s: s is not None, batch.next_sdepth)),
                                                device=self.device,
                                                dtype=torch.bool)
            depth_non_final_next_states = torch.cat(
                [s for s in batch.next_sdepth if s is not None])
            sgray_batch = torch.cat(batch.sgray)
            sdepth_batch = torch.cat(batch.sdepth)

            action_batch = torch.cat(batch.action)
            reward_batch = torch.cat(batch.reward)

            # Compute Q(s_t, a) - the model computes Q(s_t), then we select the
            # columns of actions taken. These are the actions which would've been taken
            # for each batch state according to policy_net
            sgray_action_values = self.gray_policy_net(sgray_batch).gather(
                1, action_batch)
            sdepth_action_values = self.depth_policy_net(sdepth_batch).gather(
                1, action_batch)

            # Compute V(s_{t+1}) for all next states.
            # Expected values of actions for non_final_next_states are computed based
            # on the "older" target_net; selecting their best reward with max(1)[0].
            # This is merged based on the mask, such that we'll have either the expected
            # state value or 0 in case the state was final.
            next_sgray_values = torch.zeros(self.minibatch_size,
                                            device=self.device)
            next_sgray_values[gray_non_final_mask] = self.gray_target_net(
                gray_non_final_next_states).max(1)[0].detach()

            next_sdepth_values = torch.zeros(self.minibatch_size,
                                             device=self.device)
            next_sdepth_values[depth_non_final_mask] = self.depth_target_net(
                depth_non_final_next_states).max(1)[0].detach()
            # Compute the expected Q values
            expected_sgray_action_values = (next_sgray_values *
                                            self.discount) + reward_batch
            expected_sdepth_action_values = (next_sdepth_values *
                                             self.discount) + reward_batch

            # Compute Huber loss
            gray_loss = F.smooth_l1_loss(
                sgray_action_values, expected_sgray_action_values.unsqueeze(1))
            depth_loss = F.smooth_l1_loss(
                sdepth_action_values,
                expected_sdepth_action_values.unsqueeze(1))

            # Optimize the model
            self.gray_optimizer.zero_grad()
            gray_loss.backward()
            for param in self.gray_policy_net.parameters():
                param.grad.data.clamp_(-1, 1)
            self.gray_optimizer.step()

            # Optimize the model
            self.depth_optimizer.zero_grad()
            depth_loss.backward()
            for param in self.depth_policy_net.parameters():
                param.grad.data.clamp_(-1, 1)
            self.depth_optimizer.step()