Esempio n. 1
0
         losses_per_batch.append(args.gradient_acc_steps * total_loss /
                                 100)
         print(
             '[Epoch: %d, %5d/ %d points] total loss per batch: %.7f' %
             (e, (i + 1) * args.batch_size, train_length,
              losses_per_batch[-1]))
         total_loss = 0.0
 losses_per_epoch.append(sum(losses_per_batch) / len(losses_per_batch))
 accuracy_per_epoch.append(evaluate_results(net, train_iter, cuda))
 print("Losses at Epoch %d: %.7f" % (e, losses_per_epoch[-1]))
 print("Accuracy at Epoch %d: %.7f" % (e, accuracy_per_epoch[-1]))
 if accuracy_per_epoch[-1] > acc:
     acc = accuracy_per_epoch[-1]
     torch.save({
             'epoch': e + 1,\
             'state_dict': net.state_dict(),\
             'best_acc': acc,\
             'optimizer' : optimizer.state_dict(),\
             'scheduler' : scheduler.state_dict(),\
         }, os.path.join("./data/" ,\
             "test_model_best_%d.pth.tar" % args.model_no))
 if (e % 1) == 0:
     save_as_pickle("test_losses_per_epoch_%d.pkl" % args.model_no,
                    losses_per_epoch)
     save_as_pickle("test_accuracy_per_epoch_%d.pkl" % args.model_no,
                    accuracy_per_epoch)
     torch.save({
             'epoch': e + 1,\
             'state_dict': net.state_dict(),\
             'best_acc': accuracy_per_epoch[-1],\
             'optimizer' : optimizer.state_dict(),\
Esempio n. 2
0
def main():
    device = torch.device("cpu" if hparams.no_cuda else "cuda")

    print("=== build model ===")
    start = time.time()
    model = Transformer(hparams.d_model, hparams.d_ff, vocab_size,
                        hparams.num_heads, hparams.num_layers, hparams.max_len,
                        hparams.dropout, EOS_id, PAD_id, device).to(device)
    end = time.time()
    print("=== build model done === {} seconds".format(end - start))

    train.global_step = 0

    #     train_dataset, val_dataset = split_data(train_path_en, train_path_de, hparams.validation_rate)
    train_dataset = make_dataset(train_path_en, train_path_de)
    val_dataset = make_dataset(val_path_en, val_path_de)

    train_loader = DataLoader(train_dataset,
                              batch_size=hparams.batch_size,
                              collate_fn=custom_collate,
                              shuffle=True,
                              num_workers=hparams.num_workers)
    val_loader = DataLoader(val_dataset,
                            batch_size=hparams.batch_size,
                            collate_fn=custom_collate,
                            num_workers=hparams.num_workers)

    criterion = torch.nn.NLLLoss(ignore_index=PAD_id,
                                 reduction="sum").to(device)
    optimizer = torch.optim.Adam(model.parameters(), hparams.lr)
    writer = SummaryWriter()

    for epoch in range(hparams.max_epochs):
        """train"""
        print("=== train start ===")
        start = time.time()

        loss, bleu_score = train(model, train_loader, criterion, optimizer,
                                 device, writer, epoch, hparams.print_steps)

        end = time.time()
        print("=== train done === {} seconds".format(end - start))
        print("epoch: {}/{}, loss: {}, bleu score: {}".format(
            epoch + 1, hparams.max_epochs, loss, bleu_score))

        torch.save(model.state_dict(), save_path)
        print("model saved to '{}'".format(os.path.abspath(save_path)))

        writer.add_scalar("Loss/train", loss, epoch + 1)
        writer.add_scalar("Bleu score/train", bleu_score, epoch + 1)
        """"""

        print("=== evaluation start ===")
        start = time.time()

        loss, bleu_score = evaluate(model, val_loader, criterion, optimizer,
                                    device, writer)

        end = time.time()
        print("=== evaluation done === {} seconds".format(end - start))
        print("epoch: {}/{}, loss: {}, bleu score: {}".format(
            epoch + 1, hparams.max_epochs, loss, bleu_score))

        writer.add_scalar("Loss/eval", loss, epoch + 1)
        writer.add_scalar("Bleu score/eval", bleu_score, epoch + 1)
class ModelDev:
    def __init__(self, config):
        self.config = config
        self.prepare_dataloaders(config['data'])

        # self.model = MLP(config['MLP'])
        # self.model = MLP_3D(config['MLP'])
        # self.model = LSTM(config['LSTM'])
        self.model = Transformer(config['Trans'])
        print(self.model)

        self.model_name = config['train']['model_name']

        self.checkpoint_dir = './checkpoint_dir/{}/'.format(self.model_name)
        if not os.path.exists(self.checkpoint_dir):
            os.mkdir(self.checkpoint_dir)
        self.tb_log_dir = './tb_log/{}/'.format(self.model_name)
        if not os.path.exists(self.tb_log_dir):
            os.mkdir(self.tb_log_dir)

        self.optimal_metric = 100000
        self.cur_metric = 100000

        self.loss = nn.MSELoss()
        self.optim = optim.Adam(self.model.parameters(),
                                lr=self.config['train']['lr'],
                                betas=(0.5, 0.999))

    def prepare_dataloaders(self, config):
        data = Rossler(config)
        train_data = dataset(data.train_X, data.train_Y, data.train_Z,
                             config['w_size'])
        self.train_dataloader = DataLoader(train_data,
                                           batch_size=config['batch_size'],
                                           shuffle=True,
                                           drop_last=True)
        valid_data = dataset(data.valid_X, data.valid_Y, data.valid_Z,
                             config['w_size'])
        self.valid_dataloader = DataLoader(valid_data,
                                           batch_size=config['batch_size'],
                                           shuffle=False,
                                           drop_last=True)
        test_data = dataset(data.test_X, data.test_Y, data.test_Z,
                            config['w_size'])
        self.test_dataloader = DataLoader(test_data,
                                          batch_size=config['batch_size'],
                                          shuffle=False,
                                          drop_last=True)
        self.data = data

    def train(self):
        self.writer = SummaryWriter(self.tb_log_dir)
        for self.epoch in range(self.config['train']['epochs']):
            self.train_on_epoch()
            self.cur_metric = self.valid_on_epoch()
            print(self.cur_metric)
            if self.needToSave():
                self.saveWeights()

    def train_on_epoch(self):
        self.model.train(False)
        LOSS = []
        for X_i, Y_i, Z_i, X_o, Y_o, Z_o in self.train_dataloader:
            X_i, Y_i, Z_i, X_o, Y_o, Z_o = cast_to_float(
                X_i, Y_i, Z_i, X_o, Y_o, Z_o)
            X_i += torch.normal(0, 0.1, X_i.shape)
            self.model.zero_grad()
            pred = self.model(X_i)
            loss = self.loss(pred, X_o)
            loss.backward()
            self.optim.step()
            LOSS.append(loss.data.cpu().numpy())
        self.writer.add_scalar('train Loss', np.mean(LOSS), self.epoch)

    def valid_on_epoch(self):
        self.model.train(False)
        LOSS = []
        with torch.no_grad():
            for X_i, Y_i, Z_i, X_o, Y_o, Z_o in self.valid_dataloader:
                X_i, Y_i, Z_i, X_o, Y_o, Z_o = cast_to_float(
                    X_i, Y_i, Z_i, X_o, Y_o, Z_o)
                pred = self.model(X_i)
                loss = self.loss(pred, X_o)
                LOSS.append(loss.data.cpu().numpy())
        self.writer.add_scalar('valid Loss', np.mean(LOSS), self.epoch)
        return np.mean(LOSS)

    def cast_to_float(self, X_i, Y_i, Z_i, X_o, Y_o, Z_o):
        X_i = X_i.float()
        Y_i = Y_i.float()
        Z_i = Z_i.float()
        X_o = X_o.float()
        Y_o = Y_o.float()
        Z_o = Z_o.float()
        return X_i, Y_i, Z_i, X_o, Y_o, Z_o

    def needToSave(self):
        if self.cur_metric < self.optimal_metric:
            self.optimal_metric = self.cur_metric
            return True
        return False

    def saveWeights(self, clean_previous=True):
        if clean_previous:
            files = glob(self.checkpoint_dir + '*.pth')
            for f in files:
                os.remove(f)
        torch.save(self.model.state_dict(),
                   '{}model_{}.pth'.format(self.checkpoint_dir, self.epoch))

    def test_MSE(self):
        self.model.train(False)
        self.load_weights()
        LOSS = []
        with torch.no_grad():
            for X_i, Y_i, Z_i, X_o, Y_o, Z_o in self.test_dataloader:
                X_i, Y_i, Z_i, X_o, Y_o, Z_o = cast_to_float(
                    X_i, Y_i, Z_i, X_o, Y_o, Z_o)
                pred = self.model(X_i)
                loss = self.loss(pred, X_o)
                LOSS.append(loss.data.cpu().numpy())
        return np.mean(LOSS)

    def test_a_window(self):
        self.model.train(False)
        self.load_weights()
        idx = 0
        with torch.no_grad():
            for X_i, Y_i, Z_i, X_o, Y_o, Z_o in self.test_dataloader:
                X_i, Y_i, Z_i, X_o, Y_o, Z_o = cast_to_float(
                    X_i, Y_i, Z_i, X_o, Y_o, Z_o)
                pred = self.model(X_i)
                show_a_test_window(X_i.data.numpy()[0, :],
                                   X_o.data.numpy()[0, :],
                                   pred.data.numpy()[0, :], idx,
                                   self.config['data']['stride'])
                idx += 1

    def test_long_window(self, length):
        self.model.train(False)
        self.load_weights()
        for start_idx in [100, 200, 300, 400, 500]:
            X_I = self.data.test_X[start_idx:start_idx + length]
            X_pred = X_I[:self.config['data']['w_size'] - 1]
            with torch.no_grad():
                while len(X_pred) < len(X_I):
                    nparray = np.array(X_pred[-self.config['data']['w_size'] +
                                              1:])
                    nparray = np.expand_dims(nparray, axis=0)
                    torchTensor = torch.FloatTensor(nparray)
                    pred = self.model(
                        torchTensor).data.squeeze().numpy().tolist()
                    X_pred.append(pred)
            show_long_window(X_I, X_pred, self.config['data']['stride'],
                             self.config['data']['w_size'], start_idx,
                             self.config['train']['model_name'])

    def load_weights(self):
        target_file = list(glob(self.checkpoint_dir + 'model*.pth'))[0]
        print('loading ', target_file)
        weights = torch.load(target_file)
        self.model.load_state_dict(weights)