コード例 #1
0
ファイル: train_qmix.py プロジェクト: cinjon/playground
    def compute_q_loss(global_state, state, reward, next_global_state,
                       next_state):
        if args.cuda:
            global_state = global_state.cuda()
            state = state.cuda()
            reward = reward.cuda()
            next_global_state = next_global_state.cuda()
            next_state = next_state.cuda()

        global_state = Variable(global_state, requires_grad=True)
        state = Variable(state, requires_grad=True)
        next_global_state = Variable(next_global_state, volatile=True)
        next_state = Variable(next_state, volatile=True)

        current_q_values, _ = training_agents[0].act(global_state, state)
        max_next_q_values, _ = training_agents[0].target_act(
            next_global_state, next_state)
        max_next_q_values = max_next_q_values.max(1)[0]
        # sum the rewards for individual agents
        expected_q_values = Variable(
            reward.mean(dim=1)) + args.gamma * max_next_q_values

        loss = MSELoss()(current_q_values, expected_q_values)
        loss.backward()

        return loss.cpu().data[0]
コード例 #2
0
    def fit(self, train_x, train_y, val_x, val_y, bin_size, lr, batch_size,
            with_gap, earlystop, verbose):
        self.batch_size = batch_size
        optimizer = torch.optim.Adam(self.model.parameters(),
                                     lr=lr,
                                     amsgrad=True)
        loss_list = []
        for i in range(self.maxepochs):
            optimizer.zero_grad()
            x, y = self.loader.get_batch()
            x = x.to(self.device)
            y = y.yo(self.device)

            ypred = self.model(x)

            if ypred.dim() == 2:
                ypred = ypred.squeeze(1)
            assert ypred.size() == y.size()
            loss = MSELoss(reduction='mean')(ypred, y)
            loss.backward()
            optimizer.step()
            if earlystop == True:
                loss_list = loss_list.append(self.evaluate(val_x, val_y))
            else:
                if train_x.size()[0] == batch_size:
                    loss_list = loss_list.append(loss.cpu().data.numpy())

            if len(loss_list)>5 \
            and abs(loss_list[-2]/loss_list[-1]-1)<0.0001  :
                break
        if self.earlystop == True:
            return None, loss_list[-1]
        else:
            return loss_list[-1], self.evaluate(val_x, val_y)