예제 #1
0
 def run(self):
     tracker.set_queue('train.loss', is_print=True)
     tracker.set_scalar('valid.loss', is_print=True)
     for s in self.training_loop:
         self.train()
         if self.training_loop.is_interval(self.valid_internal):
             self.validate()
예제 #2
0
    def run_training_loop(self):
        """
        ### Run training loop
        """

        # last 100 episode information
        tracker.set_queue('reward', 100, True)
        tracker.set_queue('length', 100, True)

        for update in monit.loop(self.updates):
            progress = update / self.updates

            # decreasing `learning_rate` and `clip_range` $\epsilon$
            learning_rate = 2.5e-4 * (1 - progress)
            clip_range = 0.1 * (1 - progress)

            # sample with current policy
            samples = self.sample()

            # train the model
            self.train(samples, learning_rate, clip_range)

            # write summary info to the writer, and log to the screen
            tracker.save()
            if (update + 1) % 1_000 == 0:
                logger.log()
예제 #3
0
    def __init__(self, *, model: nn.Module,
                 optimizer: Optional[torch.optim.Adam], loss_func: Callable,
                 accuracy_func: Callable):
        self.accuracy_func = accuracy_func
        self.loss_func = loss_func
        self.optimizer = optimizer
        self.model = model

        tracker.set_queue("*.loss", 20, True)
        tracker.set_scalar("*.accuracy", True)
예제 #4
0
    def run(self):
        pytorch_utils.add_model_indicators(self.model)

        tracker.set_queue("train.loss", 20, True)
        tracker.set_histogram("valid.loss", True)
        tracker.set_scalar("valid.accuracy", True)

        for _ in self.training_loop:
            self.train()
            self.test()
            if self.is_log_parameters:
                pytorch_utils.store_model_indicators(self.model)
예제 #5
0
    def run(self):
        # Training and testing
        pytorch_utils.add_model_indicators(self.model)

        tracker.set_queue("train.loss", 20, True)
        tracker.set_histogram("valid.loss", True)
        tracker.set_scalar("valid.accuracy", True)
        tracker.set_indexed_scalar('valid.sample_loss')
        tracker.set_indexed_scalar('valid.sample_pred')

        test_data = np.array([d[0].numpy() for d in self.valid_dataset])
        experiment.save_numpy("valid.data", test_data)

        for _ in self.training_loop:
            self.train()
            self.valid()
            if self.is_log_parameters:
                pytorch_utils.store_model_indicators(self.model)
예제 #6
0
    def run_training_loop(self):
        """
        ### Run training loop
        """

        # last 100 episode information
        tracker.set_queue('reward', 100, True)
        tracker.set_queue('length', 100, True)

        for update in monit.loop(self.updates):
            # sample with current policy
            samples = self.sample()

            # train the model
            self.train(samples)

            # Save tracked indicators.
            tracker.save()
            # Add a new line to the screen periodically
            if (update + 1) % 1_000 == 0:
                logger.log()
예제 #7
0
    def __init__(self, *, name: str, model: nn.Module,
                 optimizer: Optional[torch.optim.Adam], loss_func: Callable,
                 accuracy_func: Callable,
                 data_loader: torch.utils.data.DataLoader,
                 is_increment_global_step: bool, log_interval: Optional[int]):
        r"""
        Arguments:
            loss_func(Callable): A module with a call signature
                ``(output: torch.Tensor, target: torch.Tensor) -> torch.Tensor``
            accuracy_func(Callable): A module with a call signature
                ``(output: torch.Tensor, target: torch.Tensor) -> int``
        """
        self.accuracy_func = accuracy_func
        self.loss_func = loss_func
        self.log_interval = log_interval
        self.is_increment_global_step = is_increment_global_step
        self.optimizer = optimizer
        self.data_loader = data_loader
        self.name = name
        self.model = model

        tracker.set_queue(".loss", 20, True)
        tracker.set_scalar(".accuracy", True)
    def run_training_loop(self):
        """
        ### Run training loop
        """

        # Last 100 episode information
        tracker.set_queue('reward', 100, True)
        tracker.set_queue('length', 100, True)

        # Copy to target network initially
        self.target_model.load_state_dict(self.model.state_dict())

        for update in monit.loop(self.updates):
            # $\epsilon$, exploration fraction
            exploration = self.exploration_coefficient(update)
            tracker.add('exploration', exploration)
            # $\beta$ for prioritized replay
            beta = self.prioritized_replay_beta(update)
            tracker.add('beta', beta)

            # Sample with current policy
            self.sample(exploration)

            # Start training after the buffer is full
            if self.replay_buffer.is_full():
                # Train the model
                self.train(beta)

                # Periodically update target network
                if update % self.update_target_model == 0:
                    self.target_model.load_state_dict(self.model.state_dict())

            # Save tracked indicators.
            tracker.save()
            # Add a new line to the screen periodically
            if (update + 1) % 1_000 == 0:
                logger.log()
예제 #9
0
 def init(self):
     tracker.set_queue("loss.*", 20, True)
     tracker.set_scalar("accuracy.*", True)
     hook_model_outputs(self.mode, self.model, 'model')
     self.state_modules = [self.accuracy_func]
예제 #10
0
파일: fixed_cards.py 프로젝트: vpj/poker
 def run(self):
     tracker.set_queue('train.loss', is_print=True)
     for s in self.training_loop:
         self.train()
예제 #11
0
def main():
    # set indicator types
    tracker.set_queue("train_loss", 20, True)
    tracker.set_histogram("valid_loss", True)
    tracker.set_scalar("valid_accuracy", True)

    epochs = 10

    train_batch_size = 64
    test_batch_size = 1000

    use_cuda = True
    cuda_device = 0
    seed = 5
    train_log_interval = 10

    learning_rate = 0.01

    # get device
    is_cuda = use_cuda and torch.cuda.is_available()
    if not is_cuda:
        device = torch.device("cpu")
    else:
        if cuda_device < torch.cuda.device_count():
            device = torch.device(f"cuda:{cuda_device}")
        else:
            print(f"Cuda device index {cuda_device} higher than "
                  f"device count {torch.cuda.device_count()}")

            device = torch.device(f"cuda:{torch.cuda.device_count() - 1}")

    # data transform
    data_transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.1307, ), (0.3081, ))])

    # train loader
    train_loader = torch.utils.data.DataLoader(datasets.MNIST(
        str(lab.get_data_path()),
        train=True,
        download=True,
        transform=data_transform),
                                               batch_size=train_batch_size,
                                               shuffle=True)

    # test loader
    test_loader = torch.utils.data.DataLoader(datasets.MNIST(
        str(lab.get_data_path()),
        train=False,
        download=True,
        transform=data_transform),
                                              batch_size=test_batch_size,
                                              shuffle=False)

    # model
    model = Net().to(device)

    # optimizer
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # set seeds
    torch.manual_seed(seed)

    # only for logging purposes
    configs = {
        'epochs': epochs,
        'train_batch_size': train_batch_size,
        'test_batch_size': test_batch_size,
        'use_cuda': use_cuda,
        'cuda_device': cuda_device,
        'seed': seed,
        'train_log_interval': train_log_interval,
        'learning_rate': learning_rate,
        'device': device,
        'train_loader': train_loader,
        'test_loader': test_loader,
        'model': model,
        'optimizer': optimizer,
    }

    # create the experiment
    experiment.create(name='tracker')

    # experiment configs
    experiment.calculate_configs(configs)

    # pyTorch model
    experiment.add_pytorch_models(dict(model=model))

    experiment.start()

    # training loop
    for epoch in range(1, epochs + 1):
        train(model, optimizer, train_loader, device, train_log_interval)
        test(model, test_loader, device)
        logger.log()

    # save the model
    experiment.save_checkpoint()
예제 #12
0
 def init(self):
     tracker.set_queue("loss.*", 20, True)
     tracker.set_scalar("accuracy.*", True)
     self.state_modules = [self.accuracy_func]
예제 #13
0
    def startup(self):
        pytorch_utils.add_model_indicators(self.model)

        tracker.set_queue("train.loss", 20, True)
        tracker.set_histogram("valid.loss", True)
        tracker.set_scalar("valid.accuracy", True)
예제 #14
0
def main():
    # ✨ Set the types of the stats/indicators.
    # They default to scalars if not specified
    tracker.set_queue('loss.train', 20, True)
    tracker.set_histogram('loss.valid', True)
    tracker.set_scalar('accuracy.valid', True)

    # Configurations
    configs = {
        'epochs': 10,
        'train_batch_size': 64,
        'valid_batch_size': 100,
        'use_cuda': True,
        'seed': 5,
        'train_log_interval': 10,
        'learning_rate': 0.01,
    }

    is_cuda = configs['use_cuda'] and torch.cuda.is_available()
    if not is_cuda:
        device = torch.device("cpu")
    else:
        device = torch.device(f"cuda:0")

    data_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])

    train_loader = torch.utils.data.DataLoader(
        datasets.MNIST(str(lab.get_data_path()),
                       train=True,
                       download=True,
                       transform=data_transform),
        batch_size=configs['train_batch_size'], shuffle=True)

    valid_loader = torch.utils.data.DataLoader(
        datasets.MNIST(str(lab.get_data_path()),
                       train=False,
                       download=True,
                       transform=data_transform),
        batch_size=configs['valid_batch_size'], shuffle=False)

    model = Net().to(device)
    optimizer = optim.Adam(model.parameters(), lr=configs['learning_rate'])

    torch.manual_seed(configs['seed'])

    # ✨ Create the experiment
    experiment.create(name='mnist_labml_tracker')

    # ✨ Save configurations
    experiment.configs(configs)

    # ✨ Set PyTorch models for checkpoint saving and loading
    experiment.add_pytorch_models(dict(model=model))

    # ✨ Start and monitor the experiment
    with experiment.start():
        #
        for epoch in range(1, configs['epochs'] + 1):
            train(model, optimizer, train_loader, device, configs['train_log_interval'])
            validate(model, valid_loader, device)
            logger.log()

    # ✨ Save the models
    experiment.save_checkpoint()
예제 #15
0
def main_train():
    lstm_size = 1024
    lstm_layers = 3
    batch_size = 32
    seq_len = 32

    with monit.section("Loading data"):
        # Load all python files
        files = parser.load.load_files()
        # Split training and validation data
        train_files, valid_files = parser.load.split_train_valid(
            files, is_shuffle=False)

    with monit.section("Create model"):
        # Create model
        model = SimpleLstmModel(encoding_size=tokenizer.VOCAB_SIZE,
                                embedding_size=tokenizer.VOCAB_SIZE,
                                lstm_size=lstm_size,
                                lstm_layers=lstm_layers)
        # Move model to `device`
        model.to(device)

        # Create loss function and optimizer
        loss_func = torch.nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(model.parameters())

    # Initial state is 0
    h0 = torch.zeros((lstm_layers, batch_size, lstm_size), device=device)
    c0 = torch.zeros((lstm_layers, batch_size, lstm_size), device=device)

    # Setup logger indicators
    tracker.set_queue("train.loss", queue_size=500, is_print=True)
    tracker.set_queue("valid.loss", queue_size=500, is_print=True)

    # Specify the model in [lab](https://github.com/vpj/lab) for saving and loading
    experiment.add_pytorch_models({'base': model})

    # Start training scratch (step '0')
    experiment.start()

    # Number of batches per epoch
    batches = math.ceil(
        sum([len(f[1]) + 1 for f in train_files]) / (batch_size * seq_len))

    # Number of steps per epoch. We train and validate on each step.
    steps_per_epoch = 200

    # Train for 100 epochs
    for epoch in monit.loop(range(100)):
        # Create trainer
        trainer = Trainer(files=train_files,
                          model=model,
                          loss_func=loss_func,
                          optimizer=optimizer,
                          batch_size=batch_size,
                          seq_len=seq_len,
                          is_train=True,
                          h0=h0,
                          c0=c0,
                          eof=0)
        # Create validator
        validator = Trainer(files=valid_files,
                            model=model,
                            loss_func=loss_func,
                            optimizer=optimizer,
                            is_train=False,
                            seq_len=seq_len,
                            batch_size=batch_size,
                            h0=h0,
                            c0=c0,
                            eof=0)

        # Next batch to train and validation
        train_batch = 0
        valid_batch = 0

        # Loop through steps
        for i in range(1, steps_per_epoch):
            try:
                with DelayedKeyboardInterrupt():
                    # Set global step
                    global_step = epoch * batches + min(
                        batches, (batches * i) // steps_per_epoch)
                    tracker.set_global_step(global_step)

                    # Last batch to train and validate
                    train_batch_limit = trainer.x.shape[0] * min(
                        1., (i + 1) / steps_per_epoch)
                    valid_batch_limit = validator.x.shape[0] * min(
                        1., (i + 1) / steps_per_epoch)

                    with monit.section("train",
                                       total_steps=trainer.x.shape[0],
                                       is_partial=True):
                        model.train()
                        # Train
                        while train_batch < train_batch_limit:
                            trainer.run(train_batch)
                            monit.progress(train_batch + 1)
                            train_batch += 1

                    with monit.section("valid",
                                       total_steps=validator.x.shape[0],
                                       is_partial=True):
                        model.eval()
                        # Validate
                        while valid_batch < valid_batch_limit:
                            validator.run(valid_batch)
                            monit.progress(valid_batch + 1)
                            valid_batch += 1

                    # Output results
                    tracker.save()

                    # 10 lines of logs per epoch
                    if (i + 1) % (steps_per_epoch // 10) == 0:
                        logger.log()
            except KeyboardInterrupt:
                experiment.save_checkpoint()
                return

        experiment.save_checkpoint()