예제 #1
0
    def lr_find(self, freeze_until=None, start_lr=1e-7, end_lr=1, num_it=100):
        """Gridsearch the optimal learning rate for the training

        Args:
           freeze_until (str, optional): last layer to freeze
           start_lr (float, optional): initial learning rate
           end_lr (float, optional): final learning rate
           num_it (int, optional): number of iterations to perform
        """

        self.model = freeze_model(self.model.train(), freeze_until)
        # Update param groups & LR
        self._reset_opt(start_lr)
        gamma = (end_lr / start_lr)**(1 / (num_it - 1))
        scheduler = MultiplicativeLR(self.optimizer, lambda step: gamma)

        self.lr_recorder = [start_lr * gamma**idx for idx in range(num_it)]
        self.loss_recorder = []

        for batch_idx, (x, target) in enumerate(self.train_loader):
            x, target = self.to_cuda(x, target)

            # Forward
            batch_loss = self._get_loss(x, target)
            self._backprop_step(batch_loss)
            # Update LR
            scheduler.step()

            # Record
            self.loss_recorder.append(batch_loss.item())
            # Stop after the number of iterations
            if batch_idx + 1 == num_it:
                break
예제 #2
0
파일: core.py 프로젝트: frgfm/Holocron
    def lr_find(
        self,
        freeze_until: Optional[str] = None,
        start_lr: float = 1e-7,
        end_lr: float = 1,
        norm_weight_decay: Optional[float] = None,
        num_it: int = 100,
    ) -> None:
        """Gridsearch the optimal learning rate for the training

        Args:
           freeze_until (str, optional): last layer to freeze
           start_lr (float, optional): initial learning rate
           end_lr (float, optional): final learning rate
           norm_weight_decay (float, optional): weight decay to apply to normalization parameters
           num_it (int, optional): number of iterations to perform
        """

        if num_it > len(self.train_loader):
            raise ValueError("the value of `num_it` needs to be lower than the number of available batches")

        self.model = freeze_model(self.model.train(), freeze_until)
        # Update param groups & LR
        self._reset_opt(start_lr, norm_weight_decay)
        gamma = (end_lr / start_lr) ** (1 / (num_it - 1))
        scheduler = MultiplicativeLR(self.optimizer, lambda step: gamma)

        self.lr_recorder = [start_lr * gamma ** idx for idx in range(num_it)]
        self.loss_recorder = []

        if self.amp:
            self.scaler = torch.cuda.amp.GradScaler()

        for batch_idx, (x, target) in enumerate(self.train_loader):
            x, target = self.to_cuda(x, target)

            # Forward
            batch_loss = self._get_loss(x, target)
            self._backprop_step(batch_loss)
            # Update LR
            scheduler.step()

            # Record
            if torch.isnan(batch_loss) or torch.isinf(batch_loss):
                if batch_idx == 0:
                    raise ValueError("loss value is NaN or inf.")
                else:
                    break
            self.loss_recorder.append(batch_loss.item())
            # Stop after the number of iterations
            if batch_idx + 1 == num_it:
                break

        self.lr_recorder = self.lr_recorder[:len(self.loss_recorder)]
예제 #3
0
def lr_range_test(model, train, test, train_loader, test_loader):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    #model = Net().to(device)
    optimizer = optim.SGD(model.parameters(), lr=0.0001)
    lmbda = lambda epoch: 1.4
    #scheduler = OneCycleLR(optimizer,max_lr=0.5,total_steps=25)
    scheduler = MultiplicativeLR(optimizer, lr_lambda=lmbda)
    learning_lr_trace = []
    for epoch in range(1, 25):

        print(f'Epoch: {epoch} Learning_Rate {scheduler.get_lr()}')
        learning_lr_trace.append(scheduler.get_lr())
        train_loss, train_acc = train(model, device, train_loader, optimizer,
                                      epoch)
        test_loss, test_acc_l1 = test(model, device, test_loader)
        scheduler.step()

    return learning_lr_trace, train_acc, test_acc_l1
    num_train = int(P_TRAIN * num_cars)
    num_test = num_cars - num_train
    train_data, test_data = random_split(dataset, [num_train, num_test])

    # set up the train and test data loaders
    train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
    test_loader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=False)

    # load ResNet-50 with every layer frozen except for layer3-bottleneck5 and beyond,
    # and a new fully-connected network which outputs a 196-dim vector
    device = get_device()
    model = load_resnet50_layer3_bottleneck5(num_car_models)
    model = model.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = Adam(model.parameters(), lr=LEARNING_RATE)
    scheduler = MultiplicativeLR(optimizer, lr_lambda=lambda epoch: LR_DECAY)

    # set up the output logger
    output_dir = '/home/mchobanyan/data/research/transfer/vis/finetune-car-resnet50'
    model_dir = os.path.join(output_dir, 'models')
    create_folder(model_dir)
    logger = TrainingLogger(filepath=os.path.join(output_dir, 'training-log.csv'))

    for epoch in tqdm(range(NUM_EPOCHS)):
        train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
        test_loss, test_acc = test_epoch(model, test_loader, criterion, device)
        scheduler.step()
        logger.add_entry(epoch, train_loss, test_loss, train_acc, test_acc)
        checkpoint(model, os.path.join(model_dir, f'model_epoch{epoch}.pt'))
예제 #5
0
def record_lr(
    model: torch.nn.Module,
    train_loader: DataLoader,
    batch_transforms,
    optimizer,
    start_lr: float = 1e-7,
    end_lr: float = 1,
    num_it: int = 100,
    amp: bool = False,
):
    """Gridsearch the optimal learning rate for the training.
    Adapted from https://github.com/frgfm/Holocron/blob/master/holocron/trainer/core.py
    """

    if num_it > len(train_loader):
        raise ValueError(
            "the value of `num_it` needs to be lower than the number of available batches"
        )

    model = model.train()
    # Update param groups & LR
    optimizer.defaults["lr"] = start_lr
    for pgroup in optimizer.param_groups:
        pgroup["lr"] = start_lr

    gamma = (end_lr / start_lr)**(1 / (num_it - 1))
    scheduler = MultiplicativeLR(optimizer, lambda step: gamma)

    lr_recorder = [start_lr * gamma**idx for idx in range(num_it)]
    loss_recorder = []

    if amp:
        scaler = torch.cuda.amp.GradScaler()

    for batch_idx, (images, targets) in enumerate(train_loader):
        if torch.cuda.is_available():
            images = images.cuda()

        images = batch_transforms(images)

        # Forward, Backward & update
        optimizer.zero_grad()
        if amp:
            with torch.cuda.amp.autocast():
                train_loss = model(images, targets)["loss"]
            scaler.scale(train_loss).backward()
            # Gradient clipping
            scaler.unscale_(optimizer)
            torch.nn.utils.clip_grad_norm_(model.parameters(), 5)
            # Update the params
            scaler.step(optimizer)
            scaler.update()
        else:
            train_loss = model(images, targets)["loss"]
            train_loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 5)
            optimizer.step()
        # Update LR
        scheduler.step()

        # Record
        if not torch.isfinite(train_loss):
            if batch_idx == 0:
                raise ValueError("loss value is NaN or inf.")
            else:
                break
        loss_recorder.append(train_loss.item())
        # Stop after the number of iterations
        if batch_idx + 1 == num_it:
            break

    return lr_recorder[:len(loss_recorder)], loss_recorder
    def fit(
        self,
        X_train,
        y_train,
        X_validation=None,
        y_validation=None,
        loss_key="opt",
        batch_size=128,
        num_workers=0,
        learning_rate=1e-3,
        learning_rate_lambda=0.995,
        max_epoch=10000,
        early_stopping=100,
        device="cpu",
        verbose=False,
    ):
        """
        Train the model using gradient descent back propagation

        Parameters
        ----------
        X_train : {array-like, sparse matrix} of shape (n_samples, n_features)
            Features matrix used to train the model
        y_train : vector-like of shape (n_samples, 1)
            The target vector used to train the model
        X_validation : {array-like, sparse matrix} of shape (n_samples, n_features)
            Features matrix used for early stopping of the training
        y_validation : vector-like of shape (n_samples, 1)
            The target vector used for early stopping of the training
        loss_key: string (default = 'opt')
            Which field of the loss dictionary to optimize
        batch_size: int (default = 128)
            Batch size
        num_workers: int (default = 0)
            Number of cpus to use
        learning_rate: float (default = 1e-3)
            Gradient descent learning rate
        learning_rate_lambda: float (default = 0.995)
            The rate of decreasing learning_rate
        max_epoch: int (default = 10000)
            The maximum number of optimization epochs
        early_stopping: int (default = 100)
            The number of epochs without improving the bast validation loss allowed before stopping
        device : 'cpu' or 'gpu' (default = 'cpu')
            Device used by pytorch for training the model and using the trained model for encoding/decoding
        verbose: True or False (default = False)
            Verbosity
        """
        assert X_train.shape[1] == self.input_dim
        self.to(device)
        train_loader = torch.utils.data.DataLoader(
            TensorDataset(torch.Tensor(X_train), torch.Tensor(y_train)),
            batch_size=batch_size,
            shuffle=True,
            num_workers=num_workers,
        )
        if X_validation is not None:
            validation_loader = torch.utils.data.DataLoader(
                TensorDataset(torch.Tensor(X_validation),
                              torch.Tensor(y_validation)),
                batch_size=batch_size,
                shuffle=True,
                num_workers=num_workers,
            )
        else:
            validation_loader = None

        optimizer = torch.optim.Adam(self.parameters(), lr=learning_rate)
        scheduler = MultiplicativeLR(
            optimizer, lr_lambda=(lambda epoch: learning_rate_lambda))
        best_validation_loss = None
        iter_no_improve = 0
        for epoch in range(max_epoch):
            self.train()
            training_loss = 0
            for data in train_loader:
                Xb = data[0].to(device)
                optimizer.zero_grad()
                output = self(Xb)
                loss = self.loss(output, Xb)[loss_key]
                loss.backward()
                optimizer.step()
                training_loss += loss.detach().cpu().numpy()
            self.eval()
            validation_loss = 0
            if validation_loader:
                with torch.no_grad():
                    for data in validation_loader:
                        Xb = data[0].to(device)
                        output = self(Xb)
                        loss = self.loss(output, Xb)[loss_key]
                        validation_loss += loss.detach().cpu().numpy()
                    if best_validation_loss is None or validation_loss < best_validation_loss:
                        best_validation_loss = validation_loss
                        iter_no_improve = 0
                    else:
                        iter_no_improve += 1
                    if iter_no_improve > early_stopping:
                        if verbose:
                            print(f"Early stopping after {epoch} epochs")
                        break
            scheduler.step()
            if verbose:
                print(
                    f"[{epoch}] training loss={training_loss}, validation loss={validation_loss}"
                )
        return self
예제 #7
0
                                                   fitness_shaping)

        train_writer.add_scalar('fitness', raw_fitness.mean(), i)
        train_writer.add_scalar('fitness/std', raw_fitness.std(), i)
        for p_idx, p in enumerate(population.parameters()):
            train_writer.add_histogram('grads/%d' % p_idx, p.grad, i)
        for k, p in population.mixing_logits.items():
            train_writer.add_histogram(
                "entropy/%s" % k,
                t.distributions.Categorical(logits=p).entropy(), i)

        means = population.component_means  # (480, 5)
        dist = ((means.unsqueeze(0) - means.unsqueeze(1))**2).sum(
            dim=2).sqrt()  # (1, 480, 5,) - (480, 1, 5) = (480, 480, 5)
        train_writer.add_histogram("dist", dist, i)

        optim.step()
        sched.step()
        population.std *= 0.999
        mean_fit = raw_fitness.mean().item()
        pbar.set_description("avg fit: %.3f, std: %.3f" %
                             (mean_fit, raw_fitness.std().item()))

        all_params = population.parameters()

        t.save(all_params, 'last.t')
        if mean_fit > best_so_far:
            best_so_far = mean_fit
            t.save(all_params, 'best.t')
            util.upload_results('best.t')
예제 #8
0
class DeepSeqNet(Module):

    def __init__(self):
        super(DeepSeqNet, self).__init__()

    def _compile(self, optimizer, learning_rate):
        self._set_optim(optimizer, learning_rate)
        self._set_scheduler()
        self._set_criterion()

    def _set_optim(self, optimizer, learning_rate):
        optimizer = optimizer.lower()
        if optimizer == "adam":
            self.optimizer = optim.Adam(self.parameters(), lr=learning_rate)
        elif optimizer == "rmsprop":
            self.optimizer = optim.RMSprop(self.parameters(), lr=learning_rate)
        else:
            self.optimizer = optim.SGD(self.parameters(), lr=learning_rate)

    def _set_scheduler(self):
        self.scheduler = MultiplicativeLR(self.optimizer, lr_lambda=(lambda x: 0.95))

    def _set_criterion(self):
        self.criterion = nn.CrossEntropyLoss()

    def forward(self, x_txt, x_num):

        txt_features = self.txt_net_forward(x_txt)
        num_features = self.num_net_forward(x_num)

        features = torch.cat((txt_features, num_features), 1)
        out_features = self.dropout(features)

        logits = self.fc(out_features)

        return logits

    def txt_net_forward(self, x_txt):
        raise NotImplementedError()

    def num_net_forward(self, x_num):
        for linear in self.linear_layers:
            x_num = self.activation_layer(linear(x_num))
        return x_num

    def fit(self, x_txt, x_num, y):

        self.train()

        self.optimizer.zero_grad()

        y_ = self.forward(x_txt, x_num)

        loss = self.criterion(y_, y)
        loss.backward()

        self.optimizer.step()

        return loss

    def evaluate(self, data_iterator):

        self.eval()

        labels, preds = [], []
        for _, (x_txt, x_num, y) in enumerate(data_iterator):

            x_txt, x_num = x_txt.t(), x_num.t()
            if torch.cuda.is_available():
                x_txt, x_num = x_txt.cuda(), x_num.cuda()

            y_ = self.forward(x_txt, x_num)
            pred = torch.argmax(y_, 1)

            preds.extend(pred.cpu().numpy())
            labels.extend(y.numpy())

        score = accuracy_score(labels, np.array(preds).flatten())

        return score

    def run_epoch(self, train_iterator, val_iterator):

        train_losses = []
        val_accuracies = []
        losses = []
        for i, (x_txt, x_num, y) in enumerate(train_iterator):

            x_txt, x_num = x_txt.t(), x_num.t()
            if torch.cuda.is_available():
                x_txt, x_num = x_txt.cuda(), x_num.cuda()
                y = y.cuda()

            loss = self.fit(x_txt, x_num, y)
            losses.append(loss.item())

            if i % 100 == 0 and i != 0:
                avg_train_loss = float(np.mean(losses))
                train_losses.append(avg_train_loss)
                losses = []

                val_accuracy = self.evaluate(val_iterator)
                print("Iteration: %4d | train loss: %3.2f | val acc.: %.2f" % ((i + 1),
                                                                               avg_train_loss * 100,
                                                                               val_accuracy * 100))

        # Run the scheduler to reduce the learning rate
        self.scheduler.step(epoch=None)

        return train_losses, val_accuracies
예제 #9
0
class CNNModel():
    def __init__(self, args={}):
        self.args = args
        self.parse_args(args)
        self.classifier = ConvNet()
        self.optimizer = optim.Adam(self.classifier.parameters(),
                                    lr=self.lr,
                                    betas=(0.9, 0.98),
                                    eps=1e-9)
        self.loss_function = nn.CrossEntropyLoss()
        lmbda = lambda epoch: self.lr_factor
        self.lr_scheduler = MultiplicativeLR(self.optimizer, lr_lambda=lmbda)

    def parse_args(self, args):
        self.lr = args['learning_rate'] if 'learning_rate' in args else 0.001
        self.max_epoch = args['max_epoch'] if 'max_epoch' in args else 100
        self.early_stop = args['early_stop'] if 'early_stop' in args else False
        self.batch_size = args['batch_size'] if 'batch_size' in args else 64
        self.shuffle = args['shuffle'] if 'shuffle' in args else False
        self.adjust_lr = args[
            'adaptive_learning_rate'] if 'adaptive_learning_rate' in args else False
        self.early_stop_idx_limit = 10
        self.lr_factor = 0.95
        self.min_lr = 5e-6

    def adjust_learning_rate(optimizer, factor=.5, min_lr=0.00001):
        for i, param_group in enumerate(optimizer.param_groups):
            old_lr = float(param_group['lr'])
            new_lr = max(old_lr * factor, min_lr)
            param_group['lr'] = new_lr
            logger.info('adjusting learning rate from %.6f to %.6f' %
                        (old_lr, new_lr))

    def train_model(self, train_X, train_Y):
        if self.early_stop:
            best_acc = 0
            best_model = None
            early_stop_idx = 0

            train_X, dev_X = np.split(train_X, [int(len(train_X) * .8)])
            train_Y, dev_Y = np.split(train_Y, [int(len(train_Y) * .8)])

            tensor_dev_X = torch.Tensor(dev_X)
            tensor_dev_Y = torch.Tensor(dev_Y).type(torch.LongTensor)
            dev = TensorDataset(tensor_dev_X, tensor_dev_Y)
            dev_loader = DataLoader(dev,
                                    batch_size=self.batch_size,
                                    shuffle=False)

        tensor_train_X = torch.Tensor(train_X)
        tensor_train_Y = torch.Tensor(train_Y).type(torch.LongTensor)
        train = TensorDataset(tensor_train_X, tensor_train_Y)
        train_loader = DataLoader(train,
                                  batch_size=self.batch_size,
                                  shuffle=self.shuffle)
        prev_loss = np.inf

        for epoch in range(self.max_epoch):
            running_loss = 0.0
            for i, data in enumerate(train_loader):
                features, labels = data
                self.optimizer.zero_grad()
                outputs = self.classifier(
                    features.view(features.size(0), 1, 28, 28))
                loss = self.loss_function(outputs, labels)
                loss.backward()
                self.optimizer.step()

                running_loss += loss.item()

            print("epoch: ", epoch, "training loss: ", running_loss)

            if self.adjust_lr and running_loss > prev_loss:
                old_lr = self.optimizer.param_groups[0]['lr']
                self.lr_scheduler.step()
                new_lr = self.optimizer.param_groups[0]['lr']
                print("Adjusting learning rate from %.5f to %.5f" %
                      (old_lr, new_lr))

            prev_loss = running_loss

            if self.early_stop:
                with torch.no_grad():
                    dev_correct = 0.
                    dev_total = 0.
                    dev_loss = 0.
                    for data in dev_loader:
                        features, labels = data
                        outputs = self.classifier(
                            features.view(features.size(0), 1, 28, 28))
                        loss = self.loss_function(outputs, labels)
                        _, predicted = torch.max(outputs.data, 1)
                        dev_total += labels.size(0)
                        dev_correct += (predicted == labels).sum().item()
                        dev_loss += loss.item()

                    current_acc = dev_correct / dev_total

                    if current_acc > best_acc:
                        print("Best dev accuracy obtained: %.3f" % current_acc)
                        best_model = copy.deepcopy(self.classifier)
                        best_acc = current_acc
                        early_stop_idx = 0
                    else:
                        early_stop_idx += 1

                if early_stop_idx >= self.early_stop_idx_limit:
                    print("early stop triggered")
                    self.classifier = best_model
                    break

        return self

    def score(self, test_X, test_Y):
        tensor_test_X = torch.Tensor(test_X)
        tensor_test_Y = torch.Tensor(test_Y).type(torch.LongTensor)
        test = TensorDataset(tensor_test_X, tensor_test_Y)
        test_loader = DataLoader(test,
                                 batch_size=self.batch_size,
                                 shuffle=False)
        correct = 0.0
        total = 0.0
        with torch.no_grad():
            for data in test_loader:
                features, labels = data
                outputs = self.classifier(
                    features.view(features.size(0), 1, 28, 28))
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        return correct / total

    @staticmethod
    def Name():
        return "CNN"