Example #1
0
def train_and_test(s2smodel, train_reader, test_reader, block_size,
                   num_quantization_bits, max_epochs, epoch_size,
                   minibatch_size, progress_printer, warm_up):
    from Sequence2Sequence import create_criterion_function, create_model_train
    model_train = create_model_train(s2smodel)
    criterion = create_criterion_function(model_train)

    # Create learner
    if block_size is not None and num_quantization_bits != default_quantization_bits:
        raise RuntimeError(
            "Block momentum cannot be used with quantization, please remove quantized_bits option."
        )

    lr = 0.001 if use_attention else 0.005  # TODO: can we use the same value for both?
    local_learner = fsadagrad(
        model_train.parameters,
        lr=learning_rate_schedule([lr] * 2 + [lr / 2] * 3 + [lr / 4],
                                  UnitType.sample, epoch_size),
        momentum=momentum_as_time_constant_schedule(1100),
        gradient_clipping_threshold_per_sample=2.3,
        gradient_clipping_with_truncation=True)

    if block_size != None:
        learner = block_momentum_distributed_learner(local_learner,
                                                     block_size=block_size)
    else:
        learner = data_parallel_distributed_learner(
            local_learner,
            num_quantization_bits=num_quantization_bits,
            distributed_after=warm_up)

    trainer = Trainer(None, criterion, learner, progress_printer)

    train_bind = {
        criterion.arguments[0]: train_reader.streams.features,
        criterion.arguments[1]: train_reader.streams.labels
    }

    training_session(
        mb_source=train_reader,
        trainer=trainer,
        model_inputs_to_streams=train_bind,
        mb_size=minibatch_size,
        progress_frequency=epoch_size,
        checkpoint_config=CheckpointConfig(frequency=epoch_size,
                                           filename=os.path.join(
                                               model_path,
                                               "SequenceToSequence"),
                                           restore=False),
        cv_config=CrossValidationConfig(source=test_reader,
                                        mb_size=minibatch_size)).train()
Example #2
0
def train(reader, model, max_epochs):

    # declare the model's input dimension, so that the saved model is usable
    model.update_signature(Sequence[SparseTensor[vocab_size]])
    #model.declare_args(vocab_size)

    # criterion: (model args, labels) -> (loss, metric)
    #   here  (query, slot_labels) -> (ce, errs)
    criterion = create_criterion_function(model)

    labels = reader.streams.slot_labels
    #labels = reader.streams.intent_labels  # for intent classification

    #from cntk.logging.graph import plot
    #plot(criterion, filename=data_dir + "/model.pdf")

    # iteration parameters  --needed here because learner schedule needs it
    epoch_size = 36000
    minibatch_size = 70
    #epoch_size = 1000 ; max_epochs = 1 # uncomment for faster testing

    # SGD parameters
    learner = fsadagrad(criterion.parameters,
                        lr         = learning_rate_schedule([0.003]*2+[0.0015]*12+[0.0003], UnitType.sample, epoch_size),
                        momentum   = momentum_as_time_constant_schedule(minibatch_size / -math.log(0.9)),
                        gradient_clipping_threshold_per_sample = 15,
                        gradient_clipping_with_truncation = True)

    # trainer
    trainer = Trainer(None, criterion, learner)

    # process minibatches and perform model training
    log_number_of_parameters(model) ; print()
    progress_printer = ProgressPrinter(freq=100, first=10, tag='Training') # more detailed logging
    #progress_printer = ProgressPrinter(tag='Training')

    t = 0
    for epoch in range(max_epochs):         # loop over epochs
        peek(model, epoch)                  # log some interesting info
        epoch_end = (epoch+1) * epoch_size
        while t < epoch_end:                # loop over minibatches on the epoch
            # BUGBUG: The change of minibatch_size parameter vv has no effect.
            # TODO: change all examples to this pattern; then remove this comment
            data = reader.next_minibatch(min(minibatch_size, epoch_end-t))     # fetch minibatch
            #trainer.train_minibatch(data[reader.streams.query], data[labels])  # update model with it
            trainer.train_minibatch({criterion.arguments[0]: data[reader.streams.query], criterion.arguments[1]: data[labels]})  # update model with it
            t += data[labels].num_samples                                      # count samples processed so far
            progress_printer.update_with_trainer(trainer, with_metric=True)    # log progress
        loss, metric, actual_samples = progress_printer.epoch_summary(with_metric=True)

    return loss, metric # return values from last epoch
def train_and_test(s2smodel, train_reader, test_reader, block_size, num_quantization_bits, max_epochs, epoch_size, minibatch_size, progress_printer, warm_up):
    from Sequence2Sequence import create_criterion_function, create_model_train
    model_train = create_model_train(s2smodel)
    criterion = create_criterion_function(model_train)

    # Create learner
    if block_size is not None and num_quantization_bits != default_quantization_bits:
        raise RuntimeError("Block momentum cannot be used with quantization, please remove quantized_bits option.")

    lr = 0.001 if use_attention else 0.005   # TODO: can we use the same value for both?
    local_learner = fsadagrad(model_train.parameters,
                        lr       = learning_rate_schedule([lr]*2+[lr/2]*3+[lr/4], UnitType.sample, epoch_size),
                        momentum = momentum_as_time_constant_schedule(1100),
                        gradient_clipping_threshold_per_sample=2.3,
                        gradient_clipping_with_truncation=True)

    if block_size != None:
        learner = block_momentum_distributed_learner(local_learner, block_size=block_size)
    else:
        learner = data_parallel_distributed_learner(local_learner, num_quantization_bits=num_quantization_bits, distributed_after=warm_up)

    trainer = Trainer(None, criterion, learner, progress_printer)

    train_bind = {criterion.arguments[0]: train_reader.streams.features,
                  criterion.arguments[1]: train_reader.streams.labels}

    training_session(
        mb_source = train_reader,
        trainer=trainer,
        model_inputs_to_streams=train_bind,
        mb_size=minibatch_size,
        progress_frequency=epoch_size,
        checkpoint_config=CheckpointConfig(frequency = epoch_size,
                                           filename = os.path.join(model_path, "SequenceToSequence"),
                                           restore = False),
        cv_config=CrossValidationConfig(source=test_reader, mb_size=minibatch_size)
    ).train()
Example #4
0
def train(train_reader, valid_reader, vocab, i2w, s2smodel, max_epochs,
          epoch_size):

    # Note: We would like to set the signature of 's2smodel' (s2smodel.update_signature()), but that will cause
    # an error since the training criterion uses a reduced sequence axis for the labels.
    # This is because it removes the initial <s> symbol. Hence, we must leave the model
    # with unspecified input shapes and axes.

    # create the training wrapper for the s2smodel, as well as the criterion function
    model_train = create_model_train(s2smodel)
    criterion = create_criterion_function(model_train)

    # also wire in a greedy decoder so that we can properly log progress on a validation example
    # This is not used for the actual training process.
    model_greedy = create_model_greedy(s2smodel)

    # This does not need to be done in training generally though
    # Instantiate the trainer object to drive the model training
    minibatch_size = 72
    lr = 0.001 if use_attention else 0.005  # TODO: can we use the same value for both?
    learner = fsadagrad(
        model_train.parameters,
        lr=learning_rate_schedule([lr] * 2 + [lr / 2] * 3 + [lr / 4],
                                  UnitType.sample, epoch_size),
        momentum=momentum_as_time_constant_schedule(1100),
        gradient_clipping_threshold_per_sample=2.3,
        gradient_clipping_with_truncation=True)
    trainer = Trainer(None, criterion, learner)

    # Get minibatches of sequences to train with and perform model training
    total_samples = 0
    mbs = 0
    eval_freq = 100

    # print out some useful training information
    log_number_of_parameters(model_train)
    print()
    progress_printer = ProgressPrinter(freq=30, tag='Training')
    #progress_printer = ProgressPrinter(freq=30, tag='Training', log_to_file=model_path_stem + ".log") # use this to log to file

    sparse_to_dense = create_sparse_to_dense(input_vocab_dim)

    for epoch in range(max_epochs):
        print("Saving model to '%s'" % model_path(epoch))
        s2smodel.save(model_path(epoch))

        while total_samples < (epoch + 1) * epoch_size:
            # get next minibatch of training data
            mb_train = train_reader.next_minibatch(minibatch_size)
            #trainer.train_minibatch(mb_train[train_reader.streams.features], mb_train[train_reader.streams.labels])
            trainer.train_minibatch({
                criterion.arguments[0]:
                mb_train[train_reader.streams.features],
                criterion.arguments[1]:
                mb_train[train_reader.streams.labels]
            })

            progress_printer.update_with_trainer(
                trainer, with_metric=True)  # log progress

            # every N MBs evaluate on a test sequence to visually show how we're doing
            if mbs % eval_freq == 0:
                mb_valid = valid_reader.next_minibatch(1)

                # run an eval on the decoder output model (i.e. don't use the groundtruth)
                e = model_greedy(mb_valid[valid_reader.streams.features])
                print(
                    format_sequences(
                        sparse_to_dense(
                            mb_valid[valid_reader.streams.features]), i2w))
                print("->")
                print(format_sequences(e, i2w))

                # debugging attention
                if use_attention:
                    debug_attention(model_greedy,
                                    mb_valid[valid_reader.streams.features])

            total_samples += mb_train[train_reader.streams.labels].num_samples
            mbs += 1

        # log a summary of the stats for the epoch
        progress_printer.epoch_summary(with_metric=True)

    # done: save the final model
    print("Saving final model to '%s'" % model_path(max_epochs))
    s2smodel.save(model_path(max_epochs))
    print("%d epochs complete." % max_epochs)
Example #5
0
def train(reader, model, max_epochs):

    # declare the model's input dimension, so that the saved model is usable
    model.update_signature(Sequence[SparseTensor[vocab_size]])
    #model.declare_args(vocab_size)

    # criterion: (model args, labels) -> (loss, metric)
    #   here  (query, slot_labels) -> (ce, errs)
    criterion = create_criterion_function(model)

    labels = reader.streams.slot_labels
    #labels = reader.streams.intent_labels  # for intent classification

    #from cntk.logging.graph import plot
    #plot(criterion, filename=data_dir + "/model.pdf")

    # iteration parameters  --needed here because learner schedule needs it
    epoch_size = 36000
    minibatch_size = 70
    #epoch_size = 1000 ; max_epochs = 1 # uncomment for faster testing

    # SGD parameters
    learner = fsadagrad(
        criterion.parameters,
        lr=learning_rate_schedule([0.003] * 2 + [0.0015] * 12 + [0.0003],
                                  UnitType.sample, epoch_size),
        momentum=momentum_as_time_constant_schedule(minibatch_size /
                                                    -math.log(0.9)),
        gradient_clipping_threshold_per_sample=15,
        gradient_clipping_with_truncation=True)

    # trainer
    trainer = Trainer(None, criterion, learner)

    # process minibatches and perform model training
    log_number_of_parameters(model)
    print()
    progress_printer = ProgressPrinter(freq=100, first=10,
                                       tag='Training')  # more detailed logging
    #progress_printer = ProgressPrinter(tag='Training')

    t = 0
    for epoch in range(max_epochs):  # loop over epochs
        peek(model, epoch)  # log some interesting info
        epoch_end = (epoch + 1) * epoch_size
        while t < epoch_end:  # loop over minibatches on the epoch
            # BUGBUG: The change of minibatch_size parameter vv has no effect.
            # TODO: change all examples to this pattern; then remove this comment
            data = reader.next_minibatch(min(minibatch_size,
                                             epoch_end - t))  # fetch minibatch
            #trainer.train_minibatch(data[reader.streams.query], data[labels])  # update model with it
            trainer.train_minibatch({
                criterion.arguments[0]:
                data[reader.streams.query],
                criterion.arguments[1]:
                data[labels]
            })  # update model with it
            t += data[labels].num_samples  # count samples processed so far
            progress_printer.update_with_trainer(
                trainer, with_metric=True)  # log progress
        loss, metric, actual_samples = progress_printer.epoch_summary(
            with_metric=True)

    return loss, metric  # return values from last epoch
Example #6
0
def train(train_reader, valid_reader, vocab, i2w, s2smodel, max_epochs, epoch_size):

    # Note: We would like to set the signature of 's2smodel' (s2smodel.update_signature()), but that will cause
    # an error since the training criterion uses a reduced sequence axis for the labels.
    # This is because it removes the initial <s> symbol. Hence, we must leave the model
    # with unspecified input shapes and axes.

    # create the training wrapper for the s2smodel, as well as the criterion function
    model_train = create_model_train(s2smodel)
    criterion = create_criterion_function(model_train)

    # also wire in a greedy decoder so that we can properly log progress on a validation example
    # This is not used for the actual training process.
    model_greedy = create_model_greedy(s2smodel)

    # This does not need to be done in training generally though
    # Instantiate the trainer object to drive the model training
    minibatch_size = 72
    lr = 0.001 if use_attention else 0.005   # TODO: can we use the same value for both?
    learner = fsadagrad(model_train.parameters,
                        lr       = learning_rate_schedule([lr]*2+[lr/2]*3+[lr/4], UnitType.sample, epoch_size),
                        momentum = momentum_as_time_constant_schedule(1100),
                        gradient_clipping_threshold_per_sample=2.3,
                        gradient_clipping_with_truncation=True)
    trainer = Trainer(None, criterion, learner)

    # Get minibatches of sequences to train with and perform model training
    total_samples = 0
    mbs = 0
    eval_freq = 100

    # print out some useful training information
    log_number_of_parameters(model_train) ; print()
    progress_printer = ProgressPrinter(freq=30, tag='Training')
    #progress_printer = ProgressPrinter(freq=30, tag='Training', log_to_file=model_path_stem + ".log") # use this to log to file

    sparse_to_dense = create_sparse_to_dense(input_vocab_dim)

    for epoch in range(max_epochs):
        print("Saving model to '%s'" % model_path(epoch))
        s2smodel.save(model_path(epoch))

        while total_samples < (epoch+1) * epoch_size:
            # get next minibatch of training data
            mb_train = train_reader.next_minibatch(minibatch_size)
            #trainer.train_minibatch(mb_train[train_reader.streams.features], mb_train[train_reader.streams.labels])
            trainer.train_minibatch({criterion.arguments[0]: mb_train[train_reader.streams.features], criterion.arguments[1]: mb_train[train_reader.streams.labels]})

            progress_printer.update_with_trainer(trainer, with_metric=True) # log progress

            # every N MBs evaluate on a test sequence to visually show how we're doing
            if mbs % eval_freq == 0:
                mb_valid = valid_reader.next_minibatch(1)

                # run an eval on the decoder output model (i.e. don't use the groundtruth)
                e = model_greedy(mb_valid[valid_reader.streams.features])
                print(format_sequences(sparse_to_dense(mb_valid[valid_reader.streams.features]), i2w))
                print("->")
                print(format_sequences(e, i2w))

                # debugging attention
                if use_attention:
                    debug_attention(model_greedy, mb_valid[valid_reader.streams.features])

            total_samples += mb_train[train_reader.streams.labels].num_samples
            mbs += 1

        # log a summary of the stats for the epoch
        progress_printer.epoch_summary(with_metric=True)

    # done: save the final model
    print("Saving final model to '%s'" % model_path(max_epochs))
    s2smodel.save(model_path(max_epochs))
    print("%d epochs complete." % max_epochs)