Exemple #1
0
def train(model, model_name, train_loader, valid_loader, epochs=1000):
    # Create callbacks and checkpoints
    lrscheduler = ReduceLROnPlateau(patience=3, verbose=True)
    early_stopping = EarlyStopping(patience=10, min_delta=1e-4, verbose=True)
    model_path = './models/'

    os.makedirs(model_path, exist_ok=True)
    ckpt_best = ModelCheckpoint(model_path + 'best_' + model_name + '.torch',
                                save_best_only=True,
                                restore_best=True,
                                temporary_filename=model_path + 'temp_best_' + model_name + '.torch',
                                verbose=True)

    ckpt_last = ModelCheckpoint(model_path + 'last_' + model_name + '.torch',
                                temporary_filename=model_path + 'temp_last_' + model_name + '.torch')

    logger_path = './train_logs/'
    os.makedirs(logger_path, exist_ok=True)
    csv_logger = CSVLogger(logger_path + model_name + '.csv')

    callbacks = [lrscheduler, ckpt_best, ckpt_last, early_stopping, csv_logger]

    # Fit the model
    model.fit_generator(train_loader, valid_loader,
                        epochs=epochs, callbacks=callbacks)
    def fit(self,
            x_train,
            y_train,
            x_valid,
            y_valid,
            n_epochs=100,
            batch_size=32,
            log_filename=None,
            checkpoint_filename=None,
            with_early_stopping=True):
        """
        :param x_train: training set examples
        :param y_train: training set labels
        :param x_valid: testing set examples
        :param y_valid: testing set labels
        :param n_epochs: int, number of epoch default value 100
        :param batch_size: int, size of the batch  default value 32, must be multiple of 2
        :param log_filename: optional, to output the training informations
        :param checkpoint_filename: optional, to save the model
        :param with_early_stopping: to activate the early stopping or not
        :return: self, the model
        """
        callbacks = []
        if with_early_stopping:
            early_stopping = EarlyStopping(monitor='val_loss',
                                           patience=3,
                                           verbose=0)
            callbacks += [early_stopping]
        reduce_lr = ReduceLROnPlateau(monitor='loss',
                                      patience=2,
                                      factor=1 / 10,
                                      min_lr=1e-6)
        best_model_restore = BestModelRestore()
        callbacks += [reduce_lr, best_model_restore]
        if log_filename:
            logger = CSVLogger(log_filename,
                               batch_granularity=False,
                               separator='\t')
            callbacks += [logger]
        if checkpoint_filename:
            checkpointer = ModelCheckpoint(checkpoint_filename,
                                           monitor='val_loss',
                                           save_best_only=True)
            callbacks += [checkpointer]

            # self.model.fit(x_train, y_train, x_valid, y_valid,
            #                batch_size=batch_size, epochs=n_epochs,
            #                callbacks=callbacks)
            nb_steps_train, nb_step_valid = int(
                len(x_train) / batch_size), int(len(x_valid) / batch_size)
            self.model.fit_generator(
                generator(x_train, y_train, batch_size),
                steps_per_epoch=nb_steps_train,
                valid_generator=generator(x_valid, y_valid, batch_size),
                validation_steps=nb_step_valid,
                epochs=n_epochs,
                callbacks=callbacks,
            )
            return self
Exemple #3
0
 def test_reduce_lr_on_plateau_integration(self):
     train_gen = some_data_generator(OptimizerCheckpointTest.batch_size)
     valid_gen = some_data_generator(OptimizerCheckpointTest.batch_size)
     reduce_lr = ReduceLROnPlateau(monitor='loss', patience=3)
     checkpointer = LRSchedulerCheckpoint(reduce_lr,
                                          self.checkpoint_filename,
                                          period=1)
     self.model.fit_generator(train_gen,
                              valid_gen,
                              epochs=OptimizerCheckpointTest.epochs,
                              steps_per_epoch=5,
                              callbacks=[checkpointer])
Exemple #4
0
    def fit(self,
            meta_train,
            meta_valid,
            n_epochs=100,
            steps_per_epoch=100,
            log_filename=None,
            checkpoint_filename=None,
            tboard_folder=None):
        if hasattr(self.model, 'is_eval'):
            self.model.is_eval = False
        self.is_eval = False
        self.steps_per_epoch = steps_per_epoch
        callbacks = [
            EarlyStopping(patience=10, verbose=False),
            ReduceLROnPlateau(patience=2,
                              factor=1 / 2,
                              min_lr=1e-6,
                              verbose=True),
            BestModelRestore()
        ]
        if log_filename:
            callbacks += [
                CSVLogger(log_filename,
                          batch_granularity=False,
                          separator='\t')
            ]
        if checkpoint_filename:
            callbacks += [
                ModelCheckpoint(checkpoint_filename,
                                monitor='val_loss',
                                save_best_only=True,
                                temporary_filename=checkpoint_filename +
                                'temp')
            ]

        if tboard_folder is not None:
            self.writer = SummaryWriter(tboard_folder)

        self.fit_generator(meta_train,
                           meta_valid,
                           epochs=n_epochs,
                           steps_per_epoch=steps_per_epoch,
                           validation_steps=steps_per_epoch,
                           callbacks=callbacks,
                           verbose=True)
        self.is_fitted = True
        return self
Exemple #5
0
 def test_reduce_lr_on_plateau_integration(self):
     reduce_lr = ReduceLROnPlateau(monitor='loss', patience=3)
     self._fit_with_callback_integration(reduce_lr)
Exemple #6
0
 def test_reduce_lr_checkpoints(self):
     reduce_lr = ReduceLROnPlateau(monitor='loss', patience=3)
     checkpointer = LRSchedulerCheckpoint(reduce_lr,
                                          self.checkpoint_filename,
                                          period=1)
     self._test_checkpointer(checkpointer, reduce_lr)
Exemple #7
0
def main():
    randomhash = ''.join(str(time.time()).split('.'))

    parser = argparse.ArgumentParser(description='PyTorch PennTreeBank RNN/LSTM Language Model')
    parser.add_argument('--data', type=str, default='data/penn/', help='location of the data corpus')
    parser.add_argument('--model', type=str, default='LSTM', help='type of recurrent net (LSTM, QRNN, GRU)')
    parser.add_argument('--emsize', type=int, default=400, help='size of word embeddings')
    parser.add_argument('--nhid', type=int, default=1150, help='number of hidden units per layer')
    parser.add_argument('--nlayers', type=int, default=3, help='number of layers')
    parser.add_argument('--lr', type=float, default=30, help='initial learning rate')
    parser.add_argument('--clip', type=float, default=0.25, help='gradient clipping')
    parser.add_argument('--epochs', type=int, default=8000, help='upper epoch limit')
    parser.add_argument('--batch_size', type=int, default=80, metavar='N', help='batch size')
    parser.add_argument('--bptt', type=int, default=70, help='sequence length')
    parser.add_argument('--dropout', type=float, default=0.4, help='dropout applied to layers (0 = no dropout)')
    parser.add_argument('--dropouth', type=float, default=0.3, help='dropout for rnn layers (0 = no dropout)')
    parser.add_argument('--dropouti', type=float, default=0.65, help='dropout for input embedding layers (0 = no dropout)')
    parser.add_argument('--dropoute', type=float, default=0.1, help='dropout to remove words from embedding layer (0 = no dropout)')
    parser.add_argument('--wdrop', type=float, default=0.5, help='amount of weight dropout to apply to the RNN hidden to hidden matrix')
    parser.add_argument('--seed', type=int, default=1111, help='random seed')
    parser.add_argument('--nonmono', type=int, default=5, help='random seed')
    parser.add_argument('--cuda', action='store_false', help='use CUDA')
    parser.add_argument('--log-interval', type=int, default=200, metavar='N', help='report interval')
    parser.add_argument('--save', type=str,  default=randomhash+'.pt', help='path to save the final model')
    parser.add_argument('--alpha', type=float, default=2, help='alpha L2 regularization on RNN activation (alpha = 0 means no regularization)')
    parser.add_argument('--beta', type=float, default=1, help='beta slowness regularization applied on RNN activiation (beta = 0 means no regularization)')
    parser.add_argument('--wdecay', type=float, default=1.2e-6, help='weight decay applied to all weights')
    parser.add_argument('--resume', type=str,  default='', help='path of model to resume')
    parser.add_argument('--optimizer', type=str,  default='sgd', help='optimizer to use (sgd, adam)')
    parser.add_argument('--when', nargs="+", type=int, default=[-1], help='When (which epochs) to divide the learning rate by 10 - accepts multiple')
    args = parser.parse_args()
    args.tied = True

    # Set the random seed manually for reproducibility.
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if torch.cuda.is_available():
        if not args.cuda:
            print("WARNING: You have a CUDA device, so you should probably run with --cuda")
        else:
            torch.cuda.manual_seed(args.seed)

    ###############################################################################
    # Load data
    ###############################################################################
    fn = 'corpus.{}.data'.format(hashlib.md5(args.data.encode()).hexdigest())
    if os.path.exists(fn):
        print('Loading cached dataset...')
        corpus = torch.load(fn)
    else:
        print('Producing dataset...')
        corpus = data.Corpus(args.data)
        torch.save(corpus, fn)

    eval_batch_size = 20
    test_batch_size = 1
    train_data = batchify(corpus.train, args.batch_size, args)
    val_data = batchify(corpus.valid, eval_batch_size, args)
    test_data = batchify(corpus.test, test_batch_size, args)

    train_loader = SentenceLoader(train_data, args.bptt)
    valid_loader = SentenceLoader(val_data, args.bptt, False)
    test_loader = SentenceLoader(test_data, args.bptt, False)

    ntokens = len(corpus.dictionary)
    model = m.RNNModel(
        args.model,
        ntokens,
        args.emsize,
        args.nhid,
        args.nlayers,
        args.dropout,
        args.dropouth,
        args.dropouti,
        args.dropoute,
        args.wdrop,
        args.tied,
        args.alpha,
        args.beta,
        args.batch_size
    )

    if args.model == 'QRNN': model.reset()

    ###
    params = list(model.parameters())
    total_params = sum(x.size()[0] * x.size()[1] if len(x.size()) > 1 else x.size()[0] for x in params if x.size())
    print('Args:', args)
    print('Model total parameters:', total_params)

    optimizer = None
    if args.optimizer == 'sgd':
        optimizer = torch.optim.SGD(params, lr=args.lr, weight_decay=args.wdecay)
    if args.optimizer == 'adam':
        optimizer = torch.optim.Adam(params, lr=args.lr, weight_decay=args.wdecay)

    device = None
    device_id = 0
    if torch.cuda.is_available():
        torch.cuda.set_device(device_id) # Fix bug where memory is allocated on GPU0 when ask to take GPU1.
        device = torch.device('cuda:%d' % device_id)
        logging.info("Training on GPU %d" % device_id)
    else:
        logging.info("Training on CPU")

    dataset = args.data.split('/')[-1]
    model_name = "AWD_{}_{}".format(args.model, dataset)
    expt_name = './expt_{}'.format(model_name)
    expt_dir = get_experiment_directory(expt_name)
    expt = PytouneExperiment(
        expt_dir,
        model,
        device=device,
        optimizer=optimizer,
        monitor_metric='val_loss',
        monitor_mode='min'
    )

    callbacks = [
        HiddenInitCallback(args.batch_size, eval_batch_size),
        HiddenRepackagingCallback(),
        ClipNorm(params, args.clip),
        # EvaluationCallback(),
        ASGDOptimizerSwitchCallback(args),
        ReduceLROnPlateau(monitor='val_loss', mode='min', patience=20, factor=0.5, threshold_mode='abs', threshold=1e-3, verbose=True),
        AdaptativeLRSchedulerCallback(train_loader),
    ]

    try:
        expt.train(train_loader, valid_loader, callbacks=callbacks, seed=args.seed)
    except KeyboardInterrupt:
        print('-' * 89)
        print('Exiting from training early')

    print("Testing on test set...")
    expt.test(test_loader)