Ejemplo n.º 1
0
def train(config):

    assert config.model_type in ('RNN', 'LSTM')

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the model that we are going to use
    model = None

    if config.model_type == 'LSTM':
        model = LSTM(
            config.input_length,
            config.input_dim,
            config.num_hidden,
            config.num_classes,
            config.device,
        )
    elif config.model_type == 'RNN':
        model = VanillaRNN(
            config.input_length,
            config.input_dim,
            config.num_hidden,
            config.num_classes,
            config.device,
        )
    else:
        print('Your model type input is neither \'RNN\' or \'LSTM\'')
        return

    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(config.input_length + 1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(model.parameters(),
                                    lr=config.learning_rate)

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        batch_inputs = batch_inputs.to(device)
        batch_targets = batch_targets.to(device)

        # Only for time measurement of step through network
        t1 = time.time()

        # Add more code here ...
        optimizer.zero_grad()
        output = model.forward(batch_inputs)

        loss = criterion.forward(output, batch_targets)
        loss.backward()
        ############################################################################
        # QUESTION: what happens here and why?
        ############################################################################
        '''
        ANSWER:
        This function ‘clips’ the norm of the gradients by scaling the gradients down 
        by the same amount in order to reduce the norm to an acceptable level. In 
        practice this places a limit on the size of the parameter updates.
        '''
        torch.nn.utils.clip_grad_norm(model.parameters(),
                                      max_norm=config.max_norm)
        ############################################################################

        # Add more code here ...
        optimizer.step()
        with torch.no_grad():
            pred = torch.nn.functional.softmax(output, dim=0)
            pred = torch.max(pred, 1)[1]

            loss = loss  # fixme
            accuracy = np.sum([
                1 if pred[i] == batch_targets[i] else 0
                for i in range(len(pred))
            ]) / len(batch_targets)  # fixme

            # Just for time measurement
            t2 = time.time()
            examples_per_second = config.batch_size / float(t2 - t1)

            if step % 10 == 0:

                print(
                    "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                    "Accuracy = {:.2f}, Loss = {:.3f}".format(
                        datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                        config.train_steps, config.batch_size,
                        examples_per_second, accuracy, loss))

            if step == config.train_steps:
                # If you receive a PyTorch data-loader error, check this bug report:
                # https://github.com/pytorch/pytorch/pull/9655
                break

    print('Done training.')
    """
    Test memory capacity
    """
    dataset = PalindromeDataset(config.input_length + 1)
    data_loader = DataLoader(dataset, 1000, num_workers=1)
    batch_inputs, batch_targets = next(iter(data_loader))
    batch_inputs = batch_inputs.to(device)
    batch_targets = batch_targets.to(device)

    output = model.forward(batch_inputs)
    print('T:', config.input_length + 1)
    pred = torch.nn.functional.softmax(output, dim=0)
    pred = torch.max(pred, 1)[1]
    accuracy = np.sum(
        [1 if pred[i] == batch_targets[i] else 0
         for i in range(len(pred))]) / len(batch_targets)
    print("Final accuracy:", accuracy)
Ejemplo n.º 2
0
def run(model_type, input_length, input_dim, num_classes, num_hidden,
        batch_size, learning_rate, train_steps, max_norm, device):
    assert model_type in ('RNN', 'LSTM')

    # Initialize the device which to run the model on
    device = torch.device(device)

    # Initialize the model that we are going to use
    model_pars = [
        input_length, input_dim, num_hidden, num_classes, batch_size, device
    ]
    model = LSTM(*model_pars) \
        if model_type == 'LSTM' \
        else VanillaRNN(*model_pars)
    model.to(device)

    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(input_length + 1)
    data_loader = DataLoader(dataset, batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        # Add more code here ...
        optimizer.zero_grad()
        ys = model.forward(batch_inputs)

        ############################################################################
        # QUESTION: what happens here and why?
        ############################################################################
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=max_norm)
        ############################################################################

        # Add more code here ...
        predictions = ys.argmax(dim=-1)
        loss = criterion(ys, batch_targets)
        loss.backward()
        optimizer.step()
        accuracy = (batch_targets == predictions).float().mean()

        # Just for time measurement
        t2 = time.time()
        examples_per_second = batch_size / float(t2 - t1)

        stats = {'loss': loss, 'accuracy': accuracy}

        if step % 10 == 0:

            print(
                "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    train_steps, batch_size, examples_per_second, accuracy,
                    loss))

        if step == train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    print('Done training.')
    return (accuracy.item())
Ejemplo n.º 3
0
def train(config):
    assert config.model_type in ('RNN', 'LSTM')

    # Initialize the device which to run the model on
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    def acc(predictions, targets):
        accuracy = (predictions.argmax(dim=1) == targets).float().mean().item()
        return accuracy

    # Initialize the dataset and data loader (note the +1
    dataset = PalindromeDataset(config.input_length + 1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    lstm = LSTM(config.input_length, config.input_dim, config.num_hidden,
                config.num_classes)
    rnn = VanillaRNN(config.input_length, config.input_dim, config.num_hidden,
                     config.num_classes, device)

    optimizer_lstm = torch.optim.RMSprop(lstm.parameters(),
                                         lr=config.learning_rate)
    optimizer_rnn = torch.optim.RMSprop(rnn.parameters(),
                                        lr=config.learning_rate)

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        print("step", step)
        # Initialize the model that we are going to use
        lstm_out = lstm.forward(batch_inputs)

        optimizer_lstm.zero_grad()
        loss_lstm = criterion(lstm_out, batch_targets)
        loss_lstm.backward()
        optimizer_lstm.step()

        rnn_out = rnn.forward(batch_inputs)

        optimizer_rnn.zero_grad()
        loss_rnn = criterion(rnn_out, batch_targets)
        loss_rnn.backward()
        optimizer_rnn.step()

        lstm_norms = []
        for h in lstm.all_h:
            lstm_norms.append(h.grad.norm().item())

        rnn_norms = []
        for h in rnn.all_h:
            rnn_norms.append(h.grad.norm().item())

        sequence = list(range(1, config.input_length + 1))
        plt.figure(figsize=(15, 6))
        plt.plot(sequence, rnn_norms, label="rnn")
        plt.plot(sequence, lstm_norms, label="lstm")
        plt.legend()
        plt.xlabel("sequence value")
        plt.ylabel("gradient norm")

        plt.show()

        break

    print('Done training.')
Ejemplo n.º 4
0
class PalindromeExperiment(PytorchExperiment):
    def setup(self):
        self.save_checkpoint(name='setup')
        (model_type, input_length, input_dim, num_classes, num_hidden, batch_size, learning_rate, train_steps, max_norm, wanted_device) = itemgetter(*flags)(vars(self.config))

        assert model_type in ('RNN', 'LSTM')

        # Initialize the device which to run the model on
        # TODO: debug CUDA issues
        device = torch.device(wanted_device)
        # device = torch.device(device if torch.cuda.is_available() else 'cpu')

        # Initialize the model that we are going to use
        model_pars = [input_length, input_dim, num_hidden, num_classes, batch_size, device]
        self.model = LSTM(*model_pars) \
            if model_type == 'LSTM' \
            else VanillaRNN(*model_pars)
        self.model.to(device)

        # Initialize the dataset and data loader (note the +1)
        dataset = PalindromeDataset(input_length+1)
        self.data_loader = DataLoader(dataset, batch_size, num_workers=1)

        # Setup the loss and optimizer
        self.criterion = torch.nn.CrossEntropyLoss()
        self.optimizer = torch.optim.RMSprop(self.model.parameters(), lr=learning_rate)

    # TODO: plot accuracy over input_length
    # TODO: increase learning_rate over input_length
    # TODO: compare result with RNN
    def train(self, epoch):
        (model_type, input_length, input_dim, num_classes, num_hidden, batch_size, learning_rate, train_steps, max_norm, device) = itemgetter(*flags)(vars(self.config))

        with SummaryWriter('part1/train') as w:
            results = []
            for step, (batch_inputs, batch_targets) in enumerate(self.data_loader):

                # Only for time measurement of step through network
                t1 = time.time()

                # Add more code here ...
                self.optimizer.zero_grad()

                # move to device
                inputs =  torch.tensor(batch_inputs,  dtype=torch.float).to(device)
                targets = torch.tensor(batch_targets, dtype=torch.long ).to(device)

                ys = self.model.forward(inputs)

                # clip the gradients so gradient explosion won't let us overshoot the minimum
                # https://www.quora.com/What-is-gradient-clipping-and-why-is-it-necessary
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=max_norm)

                # Add more code here ...
                predictions = ys.argmax(dim=-1)
                loss = self.criterion(ys, targets)
                loss.backward()
                self.optimizer.step()
                accuracy = (targets == predictions).float().mean()

                # Just for time measurement
                t2 = time.time()
                examples_per_second = batch_size/float(t2-t1)

                stats = {'loss':loss, 'accuracy':accuracy}
                results.append({'step': step, **{k:v.item() for k,v in stats.items()}})

                if step % 100 == 0:
                    w.add_scalars('metrics', stats, int(step/10))

                    # # TODO: check why this is slow!
                    # for k, v in stats.items():
                    #     self.add_result(value=v.item(), name=f'train_{k}', counter=step / train_steps, label=k)

                    self.elog.print("elog [{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                        "Accuracy = {:.2f}, Loss = {:.3f}".format(
                            datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                            train_steps, batch_size, examples_per_second,
                            accuracy, loss
                    ))

                    self.save_checkpoint(name='train', n_iter=step)

                if step % 100 == 0:
                    results = write_csv(results, self.config)

                if step == train_steps:
                    # If you receive a PyTorch data-loader error, check this bug report:
                    # https://github.com/pytorch/pytorch/pull/9655
                    break

        print('Done training.')
        results = write_csv(results, self.config)

    def validate(self, epoch):
        pass