Ejemplo n.º 1
0
        def forward(self, x, info=None):
            if (not isinstance(x, torch.FloatTensor)):
                x = process_x(x)
            x = F.relu(self.bn1(self.conv1(x)))
            x = F.relu(self.bn2(self.conv2(x)))
            x = F.relu(self.bn3(self.conv3(x)))
            # self.info_add
            x = F.relu(self.fc(x.reshape(x.size(0), -1)))
            x = self.head(x)
            # action_prob = torch.softmax(x, -1)

            return x
Ejemplo n.º 2
0
        def choose_action(self, x):
            self.eval_net.eval()

            N_ACTIONS = N_A
            x = process_x(x)

            # input only one sample
            if np.random.uniform() < EPSILON:  # greedy
                actions_value = self.eval_net.forward(x)
                action = torch.max(actions_value, 1)[1].data.numpy()
                action = action[0]
            else:  # random
                action = np.random.randint(0, N_ACTIONS)
                # action = action if ENV_A_SHAPE == 0 else action.reshape(ENV_A_SHAPE)
            return action
Ejemplo n.º 3
0
        def choose_action(self, x):
            self.eval_net.eval()

            N_ACTIONS = N_A
            x = process_x(x)
            x.to(device)
            # input only one sample
            if np.random.uniform() < EPSILON:   # greedy
                actions_value = self.eval_net.forward(x)
                if(device == 'cuda'):
                    action = torch.max(actions_value, 1)[1].cpu().data.numpy()
                if(device == 'cpu'):
                    action = torch.max(actions_value, 1)[1].data.numpy()
                action = action[0]
            else:   # random
                action = np.random.choice(arg.action_space)
                # action = action if ENV_A_SHAPE == 0 else action.reshape(ENV_A_SHAPE)
            return action
Ejemplo n.º 4
0
        def forward(self, x, info=None):
            if(not isinstance(x, torch.FloatTensor)):
                x = process_x(x)
            x = x.to(device)
            # x = x.permute(1,0,2,3)
            x = F.relu(self.bn1(self.conv1(x)))
            x = F.relu(self.bn2(self.conv2(x)))
            x = F.relu(self.bn3(self.conv3(x)))
            x = x.reshape(x.size(0), -1)

            if(info != None):       # self.info_add
                if(isinstance(info, tuple)):
                    info = torch.cat(info)
                x = torch.cat((x, info))

            x = F.relu(self.fc(x))
            x = self.head(x)
            # action_prob = torch.softmax(x, -1)

            return x
Ejemplo n.º 5
0
                # if (arg.show_pre_image and cv_img(s_[-1])):    break_flag = 1; break
                # tt.sleep(0.5)
                break
            s = s_

    cv2.destroyAllWindows()
    f = round(tt.now() / steps, 3)
    step_per_second = round(1 / f, 3)
    print('##############################################################')
    print(i_episode, ' episodes--steps: ', steps, 'cost_time: ', tt.now(),
          'frequence: ', f, '-- step_per_second: ', step_per_second)
    1

    s_

    s_ = process_x(s_)
    # model.eval_net.train()
    model.eval_net(s_)
    model.target_net(s_)
    # torch.manual_seed(1)
    # model.target_net.load_state_dict(model.eval_net.state_dict())

    # ---------------  plot
    import matplotlib.pyplot as plt

    PLOT = True
    Plot_mean_reward = 0
    Rewards = model.Rewards
    # def plot_mean_loss(ys):
    #     cut_len = 10
    #     # y = np.zeros(len(ys))