def trainLinear(maxiter=20, wp=(6 * 5, 6 * 1), lamuda=0.00, printOut=True):
    class SCADADataset(Dataset):
        # +jf33N train together
        def __init__(self, name):
            filename = 'dataGW' + name
            with open(filename, 'rb') as f:
                self.dataH, self.dataT = pickle.load(f)

        def __len__(self):
            return self.dataH.shape[0]

        def __getitem__(self, idx):
            x = np.copy(self.dataH[idx, :])
            x = torch.from_numpy(x).float()

            y = np.copy(self.dataT[idx])
            y = torch.from_numpy(y).float()
            return x, y

    windL, predL = wp
    (inputD, outD) = (windL, predL)
    batch_size = 512

    lr = 2e-4
    weight_decay = 0.0000

    print(' lr: ', lr, ' weight_decay: ', weight_decay, ' windL: ', windL,
          ' predL: ', predL, ' batch_size: ', batch_size, ' inputD: ', inputD,
          ' outD: ', outD, ' lamuda:', lamuda)
    epochs = maxiter
    start_epoch = 0
    loadModel = False

    outf = r'C:\YANG Luoxiao\Model\WindSpeed'

    model = LinearModel(inputD, outD).to(device)
    optimizer = optim.Adam(list(model.parameters()),
                           lr=lr,
                           weight_decay=weight_decay)
    scheduler = ReduceLROnPlateau(optimizer, 'min', patience=20, verbose=True)
    minloss = 10

    # if loadModel:
    #     checkpoint = torch.load('%s/%s%d.pth' % (outf, "LSTMMutiTS4Best", num))  # largeNew5 Large5
    #     model.load_state_dict(checkpoint['model'])
    #     # D.load_state_dict(checkpoint['D'])
    #     optimizer.load_state_dict(checkpoint['optimizer'])
    #     # optimizerD.load_state_dict(checkpoint['optimizerD'])
    #     start_epoch = num
    scadaTrainDataset = SCADADataset(name='Train')
    dataloader = torch.utils.data.DataLoader(scadaTrainDataset,
                                             batch_size=batch_size,
                                             shuffle=True,
                                             num_workers=int(0))
    scadaValDataset = SCADADataset(name='Val')
    dataloaderVAl = torch.utils.data.DataLoader(scadaValDataset,
                                                batch_size=5000,
                                                shuffle=True,
                                                num_workers=int(0))
    lossTrain = np.zeros(([1, 1]))
    lossVal = np.zeros(([1, 1]))
    acc = np.zeros(([1, 1]))
    for epoch in range(start_epoch, start_epoch + epochs):
        model.train()

        for i, (x, y) in enumerate(dataloader):

            optimizer.zero_grad()
            # y = y.to(torch.long)
            x = x.to(device).view(x.shape[0], -1)
            y = y.to(device=device, dtype=torch.int64).view(-1)
            ypred = model(x)
            # y=y.long()
            # l1loss=l1Norm(model)

            # loss=F.mse_loss(tgt_y* 25.55 + 0.4, tgtpred* 25.55 + 0.4)
            loss = F.cross_entropy(ypred, y)
            loss.backward()
            lossTrain = np.vstack(
                (lossTrain, loss.detach().cpu().numpy().reshape((-1, 1))))
            optimizer.step()
            model.eval()
            c = 0
            loss = 0
            accucry = 0
            with torch.no_grad():
                for p, (x, y) in enumerate(dataloaderVAl):
                    # if p>10:
                    #     break
                    c += 1
                    x = x.to(device).view(x.shape[0], -1)
                    y = y.to(device=device, dtype=torch.int64).view(-1)
                    ypred = model(x)
                    loss += F.cross_entropy(ypred, y)

                    predict = torch.argmax(ypred, dim=1)
                    accucry += torch.sum(predict == y)
                    # break
                lengA = len(scadaValDataset)
                accucry = accucry.cpu().numpy()
                accucry = accucry / lengA
                lossVal = np.vstack((lossVal, (loss / c).cpu().numpy().reshape(
                    (-1, 1))))
                acc = np.vstack((acc, accucry.reshape((-1, 1))))
            model.train()
            #
            # break
            if printOut:
                if (i) % 2000 == 0:
                    print('[%d/%d][%d/%d]\tLoss: %.4f\t ' %
                          (epoch, start_epoch + epochs, i, len(dataloader),
                           loss))

        model.eval()
        c = 0
        loss = 0
        accucry = 0
        with torch.no_grad():
            for l, (x, y) in enumerate(dataloaderVAl):
                c += 1
                x = x.to(device).view(x.shape[0], -1)
                y = y.to(device=device, dtype=torch.int64).view(-1)
                ypred = model(x)
                loss += F.cross_entropy(ypred, y)
                predict = torch.argmax(ypred, dim=1)
                accucry += torch.sum(predict == y)
            lengA = len(scadaValDataset)
            accucry = accucry.cpu().numpy()
            accucry = accucry / lengA
            if printOut:
                print('VAL loss= ', loss / c, '  VAL accucry ', accucry)

        scheduler.step(loss / c)
        if minloss > (loss / c):
            state = {
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'epoch': epoch
            }
            if lamuda == 0:

                torch.save(state,
                           '%s/GWRLLinearWT%d.pth' % (outf, int(predL / 6)))
            else:

                torch.save(state,
                           '%s/RLlassoWT%d.pth' % (outf, int(predL / 6)))
            minloss = loss / c
            # minmapeloss=lossm/ c
            if printOut:
                print('bestaccucry:  ', accucry)
    return lossTrain[1:, :], lossVal[1:, :], acc[1:, :]
class Trainer:
    def __init__(self,
                 checkpoint_name,
                 num_classes,
                 num_input_features,
                 max_epochs=100,
                 lr=1e-2,
                 weight_decay=5e-2):
        self.checkpoint_name = checkpoint_name
        self.checkpoint_dir = os.path.join(CHECKPOINT_DIR,
                                           self.checkpoint_name)
        if not os.path.exists(self.checkpoint_dir):
            os.makedirs(self.checkpoint_dir)

        self.max_epochs = max_epochs
        self.epoch = 0
        self.lr = lr
        self.weight_decay = weight_decay

        self.num_classes = num_classes
        self.num_input_features = num_input_features
        self.model = LinearModel(num_classes=self.num_classes,
                                 num_input_features=self.num_input_features)
        self.loss_fn = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(self.model.parameters(),
                                    lr=self.lr,
                                    amsgrad=True,
                                    weight_decay=weight_decay)

        # self.log_dir = os.path.join(LOG_DIR, self.checkpoint_name)
        self.log_writer = tensorboardX.SummaryWriter('logs/' +
                                                     self.checkpoint_name)
        self.load_checkpoint('model.pt')

    def train(self, train_dataset, test_dataset, batch_size, eval_batch_size):
        train_data_loader = data.DataLoader(train_dataset,
                                            batch_size=batch_size,
                                            shuffle=True,
                                            drop_last=True)
        test_data_loader = data.DataLoader(test_dataset,
                                           batch_size=eval_batch_size,
                                           shuffle=False,
                                           drop_last=False)
        start_epoch = self.epoch
        print("Training...\n")
        for epoch in range(start_epoch, self.max_epochs):
            self.epoch += 1
            self.train_step(train_data_loader)
            train_acc = self.evaluate_step(train_data_loader)
            test_acc = self.evaluate_step(test_data_loader)
            self.log_writer.add_scalars('accuracy', {
                'test': test_acc,
                'train': train_acc
            }, self.epoch)
            if self.epoch % 20 == 0:
                print("===== Epoch {} =====".format(self.epoch))
                print('test-acc: {0}\ttrain acc: {1}'.format(
                    test_acc, train_acc))

    def train_step(self, train_data_loader):
        self.model.train()
        epoch_losses = list()
        for (features, labels) in train_data_loader:
            # print(features.shape)
            self.optimizer.zero_grad()
            predictions = self.model(features)
            loss = self.loss_fn(predictions, labels.long().view(-1))
            loss.backward()
            self.optimizer.step()
            epoch_losses.append(loss.item())
            self.save_checkpoint('model.pt')
        self.log_writer.add_scalar('loss', np.mean(epoch_losses), self.epoch)

    def evaluate_step(self, test_data_loader):
        self.model.eval()
        num_correct = 0
        num_data = 0
        with torch.no_grad():
            for (features, labels) in test_data_loader:
                predictons = self.model(features)
                predictions = torch.argmax(
                    predictons,
                    dim=1)  # take the argmax over the class dimension N x C

                is_correct = torch.eq(predictions.view(-1),
                                      labels.long().view(-1)).int()
                num_correct += torch.sum(is_correct).item()
                num_data += labels.size(0)
        accuracy = num_correct / num_data
        return accuracy

    def save_checkpoint(self, filename):
        checkpoint_filepath = os.path.join(self.checkpoint_dir, filename)
        torch.save(
            {
                'epoch': self.epoch,
                'model': self.model.state_dict(),
                'optim': self.optimizer.state_dict()
            }, checkpoint_filepath)

    def load_checkpoint(self, filename):
        checkpoint_filepath = os.path.join(self.checkpoint_dir, filename)
        if os.path.exists(checkpoint_filepath):
            checkpoint = torch.load(checkpoint_filepath)
            self.epoch = checkpoint['epoch']
            self.model.load_state_dict(checkpoint['model'])
            self.optimizer.load_state_dict(checkpoint['optim'])
            print("Loaded checkpoint")
        else:
            print("Checkpoint not found, continuing...")
Beispiel #3
0
class DQNAgent():
    """Implementation of DQN"""

    def __init__(self, state_size, action_size, seed, opts={}):
        """Initialise the agent

        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            seed (int): Random seed
            opts (dict): optional settings
                BUFFER_SIZE (long): Max size for replay buffer
                BATCH_SIZE (int): Sample size of experiences from replay buffer
                GAMMA (float): discount factor
                TAU (float): soft update of target parameters
                LR (float): optimizer learning rate
                UPDATE_EVERY (int): how ofter to update the network
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)

        self.local_network = LinearModel(state_size, action_size, seed).to(device)
        self.fixed_network = LinearModel(state_size, action_size, seed).to(device)

        # Overwrite the default parameters
        self.buffer_size  = opts['BUFFER_SIZE'] if 'BUFFER_SIZE' in opts else BUFFER_SIZE
        self.batch_size = opts['BATCH_SIZE'] if 'BATCH_SIZE' in opts else BATCH_SIZE        
        self.gamma = opts['GAMMA'] if 'GAMMA' in opts else GAMMA
        self.tau = opts['TAU'] if 'TAU' in opts else TAU
        self.lr = opts['LR'] if 'LR' in opts else LR
        self.update_every = opts['UPDATE_EVERY'] if 'UPDATE_EVERY' in opts else UPDATE_EVERY
        self.optim = optim.Adam(self.local_network.parameters(), lr=self.lr)

        # Initialize replay buffer
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size, seed)
        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0
        self.history = defaultdict(list)

    def act(self, state, eps):
        """Returns the action for specified state

        Params
        ======
            state (array_like): environment state
            eps (float): epsilon, to use in greedy-policy
        """
        if random.random() < eps:
            return random.choice(range(self.action_size))
        else:
            # convert the state to tensor
            state = torch.from_numpy(state).float().unsqueeze(0).to(device)
            # change network into evaluation mode with no gradients
            self.local_network.eval()
            with torch.no_grad():
                action_values = self.local_network(state)
            # change network back to training mode
            self.local_network.train()
            return np.argmax(action_values.cpu().data.numpy())


    def step(self, state, action, reward, next_state, done):
        """Collects experience and learns from experience

        Params
        ======
            state (array_like): environment state (S)
            action (int): action taken on state (A)
            reward (float): reward (R) received by taking action A in state S
            next_state (array_like): environment state (S') received after taking action A in state S
            done (boolean): whether the episode ended after taking action A in state S
        """
        self.memory.add(state, action, reward, next_state, done)
        self.t_step += 1

        if self.t_step % self.update_every == 0:
            if len(self.memory) > self.batch_size:
                experiences = self.memory.sample()
                self.learn(experiences, self.gamma)
        

    def learn(self, experiences, gamma):
        """Use experience to learn from it

        Params
        ======
            experiences (Tuple[torch.Tensor]): tuple of (s, a, r, s', done) tuples
            gamma (float): discount factor
        """
        # logger.debug('learn gamma: {}'.format(gamma))
        states, actions, rewards, next_states, dones = experiences
        Q_next = self.fixed_network(next_states).detach().max(1)[0].unsqueeze(1)
        Q_target = rewards + (gamma * Q_next * (1 - dones))

        Q_estimates = self.local_network(states).gather(1, actions)

        loss = F.mse_loss(Q_estimates, Q_target)
        self.history['loss'].append(loss.item())
        # logger.debug('Loss: {}'.format(loss.item()))

        self.optim.zero_grad()
        loss.backward()
        self.optim.step()

        # Update fixed network
        self.update_fixed_network(self.local_network, self.fixed_network, self.tau)
        

    def update_fixed_network(self, local_model, target_model, tau):
        """Updates fixed target network weights using following:
        target = tau * target_model weights + (1 - tau) * local_model weights

        Params
        ======
            local_model (Pytorch model): source model to copy weights from
            target_model (Pytorch model): target model to copy weights to
            tau (float): decide how much weight to apply when updating weights                
        """
        for target_weights, local_weights in zip(target_model.parameters(), local_model.parameters()):
            target_weights.data.copy_(tau * target_weights + (1 - tau) * local_weights)

    def save(self, filename):
        """Save local model parameters
        
        Params
        ======
            filename (string): filename
        """
        torch.save(self.local_network.state_dict(), filename)