Esempio n. 1
0
def train_lm():
    data = DataReader(token_to_id_path, segment_sepparator)

    # Create model nodes for the source and target inputs
    input_sequence, label_sequence = create_inputs(data.vocab_dim)

    # Create the model. It has three output nodes
    # z: the input to softmax that  provides the latent representation of the next token
    # cross_entropy: this is used training criterion
    # error: this a binary indicator if the model predicts the correct token
    z, cross_entropy, error = create_model(input_sequence, label_sequence, data.vocab_dim, hidden_dim)

    # For measurement we use the (build in) full softmax.
    full_ce = C.cross_entropy_with_softmax(z, label_sequence)

    # print out some useful training information
    log_number_of_parameters(z) ; print()
    
    # Run the training loop
    num_trained_samples = 0
    num_trained_samples_since_last_report = 0

    # Instantiate the trainer object to drive the model training
    lr_schedule = learning_rate_schedule(learning_rate, UnitType.sample)
    momentum_schedule = momentum_as_time_constant_schedule(momentum_as_time_constant)
    gradient_clipping_with_truncation = True
    learner = momentum_sgd(z.parameters, lr_schedule, momentum_schedule,
                            gradient_clipping_threshold_per_sample=clipping_threshold_per_sample,
                            gradient_clipping_with_truncation=gradient_clipping_with_truncation)
    trainer = Trainer(z, (cross_entropy, error), learner)
  
    for epoch_count in range(num_epochs):
        for features, labels, token_count in data.minibatch_generator(train_file_path, sequence_length, sequences_per_batch):
            arguments = ({input_sequence : features, label_sequence : labels})

            t_start = timeit.default_timer()
            trainer.train_minibatch(arguments)
            t_end =  timeit.default_timer()

            samples_per_second = token_count / (t_end - t_start)

            # Print progress report every num_samples_between_progress_report samples

            if num_trained_samples_since_last_report >= num_samples_between_progress_report or num_trained_samples == 0:
                av_ce = average_cross_entropy(full_ce, input_sequence, label_sequence, data)
                print_progress(samples_per_second, av_ce, num_trained_samples, t_start)
                num_trained_samples_since_last_report = 0

            num_trained_samples += token_count
            num_trained_samples_since_last_report += token_count

        # after each epoch save the model
        model_filename = "models/lm_epoch%d.dnn" % epoch_count
        z.save_model(model_filename)
        print("Saved model to '%s'" % model_filename)
Esempio n. 2
0
def train_lm(training_file, epochs, max_num_minibatches):

    # load the data and vocab
    data, char_to_ix, ix_to_char, data_size, vocab_dim = load_data_and_vocab(training_file)

    # Model the source and target inputs to the model
    input_sequence, label_sequence = create_inputs(vocab_dim)

    # create the model
    model = create_model(vocab_dim)
    
    # and apply it to the input sequence    
    z = model(input_sequence)

    # setup the criterions (loss and metric)
    ce = cross_entropy_with_softmax(z, label_sequence)
    errs = classification_error(z, label_sequence)

    # Instantiate the trainer object to drive the model training
    lr_per_sample = learning_rate_schedule(0.001, UnitType.sample)
    momentum_time_constant = momentum_as_time_constant_schedule(1100)
    clipping_threshold_per_sample = 5.0
    gradient_clipping_with_truncation = True
    learner = momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant,
                           gradient_clipping_threshold_per_sample=clipping_threshold_per_sample,
                           gradient_clipping_with_truncation=gradient_clipping_with_truncation)
    progress_printer = ProgressPrinter(freq=100, tag='Training')
    trainer = Trainer(z, (ce, errs), learner, progress_printer)

    sample_freq = 1000
    minibatches_per_epoch = min(data_size // minibatch_size, max_num_minibatches // epochs)

    # print out some useful training information
    log_number_of_parameters(z)
    print ("Running %d epochs with %d minibatches per epoch" % (epochs, minibatches_per_epoch))
    print()

    for e in range(0, epochs):
        # Specify the mapping of input variables in the model to actual minibatch data to be trained with
        # If it's the start of the data, we specify that we are looking at a new sequence (True)
        mask = [True]
        for b in range(0, minibatches_per_epoch):
            # get the data            
            features, labels = get_data(b, minibatch_size, data, char_to_ix, vocab_dim)
            arguments = ({input_sequence : features, label_sequence : labels}, mask)
            mask = [False] 
            trainer.train_minibatch(arguments)

            global_minibatch = e*minibatches_per_epoch + b
            if global_minibatch % sample_freq == 0:
                print(sample(z, ix_to_char, vocab_dim, char_to_ix))

        model_filename = "models/shakespeare_epoch%d.dnn" % (e+1)
        z.save_model(model_filename)
        print("Saved model to '%s'" % model_filename)
Esempio n. 3
0
def train_fast_rcnn(debug_output=False):
    if debug_output:
        print("Storing graphs and intermediate models to %s." % os.path.join(abs_path, "Output"))

    # Create the minibatch source
    minibatch_source = create_mb_source(image_height, image_width, num_channels,
                                        num_classes, num_rois, base_path, "train")

    # Input variables denoting features, rois and label data
    image_input = input_variable((num_channels, image_height, image_width))
    roi_input   = input_variable((num_rois, 4))
    label_input = input_variable((num_rois, num_classes))

    # define mapping from reader streams to network inputs
    input_map = {
        image_input: minibatch_source[features_stream_name],
        roi_input: minibatch_source[roi_stream_name],
        label_input: minibatch_source[label_stream_name]
    }

    # Instantiate the Fast R-CNN prediction model and loss function
    frcn_output = frcn_predictor(image_input, roi_input, num_classes)
    ce = cross_entropy_with_softmax(frcn_output, label_input, axis=1)
    pe = classification_error(frcn_output, label_input, axis=1)
    if debug_output:
        plot(frcn_output, os.path.join(abs_path, "Output", "graph_frcn.png"))

    # Set learning parameters
    l2_reg_weight = 0.0005
    lr_per_sample = [0.00001] * 10 + [0.000001] * 5 + [0.0000001]
    lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample)
    mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant)

    # Instantiate the trainer object
    learner = momentum_sgd(frcn_output.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight)
    trainer = Trainer(frcn_output, (ce, pe), learner)

    # Get minibatches of images and perform model training
    print("Training Fast R-CNN model for %s epochs." % max_epochs)
    log_number_of_parameters(frcn_output)
    progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs)
    for epoch in range(max_epochs):       # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data = minibatch_source.next_minibatch(min(mb_size, epoch_size-sample_count), input_map=input_map)
            trainer.train_minibatch(data)                                    # update model with it
            sample_count += trainer.previous_minibatch_sample_count          # count samples processed so far
            progress_printer.update_with_trainer(trainer, with_metric=True)  # log progress

        progress_printer.epoch_summary(with_metric=True)
        if debug_output:
            frcn_output.save(os.path.join(abs_path, "Output", "frcn_py_%s.model" % (epoch+1)))

    return frcn_output
Esempio n. 4
0
def train_model(base_model_file, feature_node_name, last_hidden_node_name,
                image_width, image_height, num_channels, num_classes, train_map_file,
                num_epochs, max_images=-1, freeze=False):
    epoch_size = sum(1 for line in open(train_map_file))
    if max_images > 0:
        epoch_size = min(epoch_size, max_images)

    # Create the minibatch source and input variables
    minibatch_source = create_mb_source(train_map_file, image_width, image_height, num_channels, num_classes)
    image_input = input_variable((num_channels, image_height, image_width))
    label_input = input_variable(num_classes)

    # Define mapping from reader streams to network inputs
    input_map = {
        image_input: minibatch_source[features_stream_name],
        label_input: minibatch_source[label_stream_name]
    }

    # Instantiate the transfer learning model and loss function
    tl_model = create_model(base_model_file, feature_node_name, last_hidden_node_name, num_classes, image_input, freeze)
    ce = cross_entropy_with_softmax(tl_model, label_input)
    pe = classification_error(tl_model, label_input)

    # Instantiate the trainer object
    lr_schedule = learning_rate_schedule(lr_per_mb, unit=UnitType.minibatch)
    mm_schedule = momentum_schedule(momentum_per_mb)
    learner = momentum_sgd(tl_model.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight)
    trainer = Trainer(tl_model, (ce, pe), learner)

    # Get minibatches of images and perform model training
    print("Training transfer learning model for {0} epochs (epoch_size = {1}).".format(num_epochs, epoch_size))
    log_number_of_parameters(tl_model)
    progress_printer = ProgressPrinter(tag='Training', num_epochs=num_epochs)
    for epoch in range(num_epochs):       # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data = minibatch_source.next_minibatch(min(mb_size, epoch_size-sample_count), input_map=input_map)
            trainer.train_minibatch(data)                                    # update model with it
            sample_count += trainer.previous_minibatch_sample_count          # count samples processed so far
            progress_printer.update_with_trainer(trainer, with_metric=True)  # log progress
            if sample_count % (100 * mb_size) == 0:
                print ("Processed {0} samples".format(sample_count))

        progress_printer.epoch_summary(with_metric=True)

    return tl_model
Esempio n. 5
0
def train_lm(training_file, max_num_minibatches):

    # load the data and vocab
    data, char_to_ix, ix_to_char, data_size, vocab_dim = load_data_and_vocab(
        training_file)

    # Model the source and target inputs to the model
    input_sequence, label_sequence = create_inputs(vocab_dim)

    # create the model
    model = create_model(vocab_dim)

    # and apply it to the input sequence
    z = model(input_sequence)

    # setup the criterions (loss and metric)
    ce = cross_entropy_with_softmax(z, label_sequence)
    errs = classification_error(z, label_sequence)

    # Instantiate the trainer object to drive the model training
    lr_per_sample = learning_rate_schedule(0.001, UnitType.sample)
    momentum_time_constant = momentum_as_time_constant_schedule(1100)
    clipping_threshold_per_sample = 5.0
    gradient_clipping_with_truncation = True
    learner = momentum_sgd(
        z.parameters,
        lr_per_sample,
        momentum_time_constant,
        gradient_clipping_threshold_per_sample=clipping_threshold_per_sample,
        gradient_clipping_with_truncation=gradient_clipping_with_truncation)
    trainer = Trainer(z, (ce, errs), learner)

    sample_freq = 1000
    epochs = 50
    minibatches_per_epoch = int((data_size / minibatch_size))
    minibatches = min(epochs * minibatches_per_epoch, max_num_minibatches)

    # print out some useful training information
    log_number_of_parameters(z)
    print()
    progress_printer = ProgressPrinter(freq=100, tag='Training')

    e = 0
    p = 0
    for i in range(0, minibatches):

        if p + minibatch_size + 1 >= data_size:
            p = 0
            e += 1
            model_filename = "models/shakespeare_epoch%d.dnn" % e
            z.save(model_filename)
            print("Saved model to '%s'" % model_filename)

        # get the data
        features, labels = get_data(p, minibatch_size, data, char_to_ix,
                                    vocab_dim)

        # Specify the mapping of input variables in the model to actual minibatch data to be trained with
        # If it's the start of the data, we specify that we are looking at a new sequence (True)
        mask = [False]
        if p == 0:
            mask = [True]
        arguments = ({input_sequence: features, label_sequence: labels}, mask)
        trainer.train_minibatch(arguments)

        progress_printer.update_with_trainer(trainer,
                                             with_metric=True)  # log progress

        if i % sample_freq == 0:
            print(sample(z, ix_to_char, vocab_dim, char_to_ix))

        p += minibatch_size

    # Do a final save of the model
    model_filename = "models/shakespeare_epoch%d.dnn" % e
    z.save(model_filename)
Esempio n. 6
0
def train_model(base_model_file,
                feature_node_name,
                last_hidden_node_name,
                image_width,
                image_height,
                num_channels,
                num_classes,
                train_map_file,
                num_epochs,
                max_images=-1,
                freeze=False):
    epoch_size = sum(1 for line in open(train_map_file))
    if max_images > 0:
        epoch_size = min(epoch_size, max_images)

    # Create the minibatch source and input variables
    minibatch_source = create_mb_source(train_map_file, image_width,
                                        image_height, num_channels,
                                        num_classes)
    image_input = input_variable((num_channels, image_height, image_width))
    label_input = input_variable(num_classes)

    # Define mapping from reader streams to network inputs
    input_map = {
        image_input: minibatch_source[features_stream_name],
        label_input: minibatch_source[label_stream_name]
    }

    # Instantiate the transfer learning model and loss function
    tl_model = create_model(base_model_file, feature_node_name,
                            last_hidden_node_name, num_classes, image_input,
                            freeze)
    ce = cross_entropy_with_softmax(tl_model, label_input)
    pe = classification_error(tl_model, label_input)

    # Instantiate the trainer object
    lr_schedule = learning_rate_schedule(lr_per_mb, unit=UnitType.minibatch)
    mm_schedule = momentum_schedule(momentum_per_mb)
    learner = momentum_sgd(tl_model.parameters,
                           lr_schedule,
                           mm_schedule,
                           l2_regularization_weight=l2_reg_weight)
    trainer = Trainer(tl_model, (ce, pe), learner)

    # Get minibatches of images and perform model training
    print(
        "Training transfer learning model for {0} epochs (epoch_size = {1}).".
        format(num_epochs, epoch_size))
    log_number_of_parameters(tl_model)
    progress_printer = ProgressPrinter(tag='Training', num_epochs=num_epochs)
    for epoch in range(num_epochs):  # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data = minibatch_source.next_minibatch(min(
                mb_size, epoch_size - sample_count),
                                                   input_map=input_map)
            trainer.train_minibatch(data)  # update model with it
            sample_count += trainer.previous_minibatch_sample_count  # count samples processed so far
            progress_printer.update_with_trainer(
                trainer, with_metric=True)  # log progress
            if sample_count % (100 * mb_size) == 0:
                print("Processed {0} samples".format(sample_count))

        progress_printer.epoch_summary(with_metric=True)

    return tl_model
Esempio n. 7
0
def train_lm():
    data = DataReader(token_to_id_path, segment_sepparator)

    # Create model nodes for the source and target inputs
    input_sequence, label_sequence = create_inputs(data.vocab_dim)

    # Create the model. It has three output nodes
    # z: the input to softmax that  provides the latent representation of the next token
    # cross_entropy: this is used training criterion
    # error: this a binary indicator if the model predicts the correct token
    z, cross_entropy, error = create_model(input_sequence, label_sequence,
                                           data.vocab_dim, hidden_dim)

    # For measurement we use the (build in) full softmax.
    full_ce = C.cross_entropy_with_softmax(z, label_sequence)

    # print out some useful training information
    log_number_of_parameters(z)
    print()

    # Run the training loop
    num_trained_samples = 0
    num_trained_samples_since_last_report = 0

    # Instantiate the trainer object to drive the model training
    lr_schedule = learning_rate_schedule(learning_rate, UnitType.sample)
    momentum_schedule = momentum_as_time_constant_schedule(
        momentum_as_time_constant)
    gradient_clipping_with_truncation = True
    learner = momentum_sgd(
        z.parameters,
        lr_schedule,
        momentum_schedule,
        gradient_clipping_threshold_per_sample=clipping_threshold_per_sample,
        gradient_clipping_with_truncation=gradient_clipping_with_truncation)
    trainer = Trainer(z, (cross_entropy, error), learner)

    for epoch_count in range(num_epochs):
        for features, labels, token_count in data.minibatch_generator(
                train_file_path, sequence_length, sequences_per_batch):
            arguments = ({input_sequence: features, label_sequence: labels})

            t_start = timeit.default_timer()
            trainer.train_minibatch(arguments)
            t_end = timeit.default_timer()

            samples_per_second = token_count / (t_end - t_start)

            # Print progress report every num_samples_between_progress_report samples

            if num_trained_samples_since_last_report >= num_samples_between_progress_report or num_trained_samples == 0:
                av_ce = average_cross_entropy(full_ce, input_sequence,
                                              label_sequence, data)
                print_progress(samples_per_second, av_ce, num_trained_samples,
                               t_start)
                num_trained_samples_since_last_report = 0

            num_trained_samples += token_count
            num_trained_samples_since_last_report += token_count

        # after each epoch save the model
        model_filename = "models/lm_epoch%d.dnn" % epoch_count
        z.save_model(model_filename)
        print("Saved model to '%s'" % model_filename)
Esempio n. 8
0
def train(train_reader, valid_reader, vocab, i2w, s2smodel, max_epochs, epoch_size):

    # Note: We would like to set the signature of 's2smodel' (s2smodel.update_signature()), but that will cause
    # an error since the training criterion uses a reduced sequence axis for the labels.
    # This is because it removes the initial <s> symbol. Hence, we must leave the model
    # with unspecified input shapes and axes.

    # create the training wrapper for the s2smodel, as well as the criterion function
    model_train = create_model_train(s2smodel)
    criterion = create_criterion_function(model_train)

    # also wire in a greedy decoder so that we can properly log progress on a validation example
    # This is not used for the actual training process.
    model_greedy = create_model_greedy(s2smodel)

    # This does not need to be done in training generally though
    # Instantiate the trainer object to drive the model training
    minibatch_size = 72
    lr = 0.001 if use_attention else 0.005   # TODO: can we use the same value for both?
    learner = adam_sgd(model_train.parameters,
                       lr       = learning_rate_schedule([lr]*2+[lr/2]*3+[lr/4], UnitType.sample, epoch_size),
                       momentum = momentum_as_time_constant_schedule(1100),
                       gradient_clipping_threshold_per_sample=2.3,
                       gradient_clipping_with_truncation=True)
    trainer = Trainer(None, criterion, learner)

    # Get minibatches of sequences to train with and perform model training
    total_samples = 0
    mbs = 0
    eval_freq = 100

    # print out some useful training information
    log_number_of_parameters(model_train) ; print()
    progress_printer = ProgressPrinter(freq=30, tag='Training')
    #progress_printer = ProgressPrinter(freq=30, tag='Training', log_to_file=model_path_stem + ".log") # use this to log to file

    sparse_to_dense = create_sparse_to_dense(input_vocab_dim)

    for epoch in range(max_epochs):
        print("Saving model to '%s'" % model_path(epoch))
        s2smodel.save(model_path(epoch))

        while total_samples < (epoch+1) * epoch_size:
            # get next minibatch of training data
            mb_train = train_reader.next_minibatch(minibatch_size)
            #trainer.train_minibatch(mb_train[train_reader.streams.features], mb_train[train_reader.streams.labels])
            trainer.train_minibatch({criterion.arguments[0]: mb_train[train_reader.streams.features], criterion.arguments[1]: mb_train[train_reader.streams.labels]})

            progress_printer.update_with_trainer(trainer, with_metric=True) # log progress

            # every N MBs evaluate on a test sequence to visually show how we're doing
            if mbs % eval_freq == 0:
                mb_valid = valid_reader.next_minibatch(1)

                # run an eval on the decoder output model (i.e. don't use the groundtruth)
                e = model_greedy(mb_valid[valid_reader.streams.features])
                print(format_sequences(sparse_to_dense(mb_valid[valid_reader.streams.features]), i2w))
                print("->")
                print(format_sequences(e, i2w))

                # debugging attention
                if use_attention:
                    debug_attention(model_greedy, mb_valid[valid_reader.streams.features])

            total_samples += mb_train[train_reader.streams.labels].num_samples
            mbs += 1

        # log a summary of the stats for the epoch
        progress_printer.epoch_summary(with_metric=True)

    # done: save the final model
    print("Saving final model to '%s'" % model_path(max_epochs))
    s2smodel.save(model_path(max_epochs))
    print("%d epochs complete." % max_epochs)
Esempio n. 9
0
def train(train_reader, valid_reader, vocab, i2w, s2smodel, max_epochs,
          epoch_size):

    # Note: We would like to set the signature of 's2smodel' (s2smodel.update_signature()), but that will cause
    # an error since the training criterion uses a reduced sequence axis for the labels.
    # This is because it removes the initial <s> symbol. Hence, we must leave the model
    # with unspecified input shapes and axes.

    # create the training wrapper for the s2smodel, as well as the criterion function
    model_train = create_model_train(s2smodel)
    criterion = create_criterion_function(model_train)

    # also wire in a greedy decoder so that we can properly log progress on a validation example
    # This is not used for the actual training process.
    model_greedy = create_model_greedy(s2smodel)

    # This does not need to be done in training generally though
    # Instantiate the trainer object to drive the model training
    minibatch_size = 72
    lr = 0.001 if use_attention else 0.005  # TODO: can we use the same value for both?
    learner = adam_sgd(
        model_train.parameters,
        lr=learning_rate_schedule([lr] * 2 + [lr / 2] * 3 + [lr / 4],
                                  UnitType.sample, epoch_size),
        momentum=momentum_as_time_constant_schedule(1100),
        gradient_clipping_threshold_per_sample=2.3,
        gradient_clipping_with_truncation=True)
    trainer = Trainer(None, criterion, learner)

    # Get minibatches of sequences to train with and perform model training
    total_samples = 0
    mbs = 0
    eval_freq = 100

    # print out some useful training information
    log_number_of_parameters(model_train)
    print()
    progress_printer = ProgressPrinter(freq=30, tag='Training')
    #progress_printer = ProgressPrinter(freq=30, tag='Training', log_to_file=model_path_stem + ".log") # use this to log to file

    sparse_to_dense = create_sparse_to_dense(input_vocab_dim)

    for epoch in range(max_epochs):
        print("Saving model to '%s'" % model_path(epoch))
        s2smodel.save(model_path(epoch))

        while total_samples < (epoch + 1) * epoch_size:
            # get next minibatch of training data
            mb_train = train_reader.next_minibatch(minibatch_size)
            #trainer.train_minibatch(mb_train[train_reader.streams.features], mb_train[train_reader.streams.labels])
            trainer.train_minibatch({
                criterion.arguments[0]:
                mb_train[train_reader.streams.features],
                criterion.arguments[1]:
                mb_train[train_reader.streams.labels]
            })

            progress_printer.update_with_trainer(
                trainer, with_metric=True)  # log progress

            # every N MBs evaluate on a test sequence to visually show how we're doing
            if mbs % eval_freq == 0:
                mb_valid = valid_reader.next_minibatch(1)

                # run an eval on the decoder output model (i.e. don't use the groundtruth)
                e = model_greedy(mb_valid[valid_reader.streams.features])
                print(
                    format_sequences(
                        sparse_to_dense(
                            mb_valid[valid_reader.streams.features]), i2w))
                print("->")
                print(format_sequences(e, i2w))

                # debugging attention
                if use_attention:
                    debug_attention(model_greedy,
                                    mb_valid[valid_reader.streams.features])

            total_samples += mb_train[train_reader.streams.labels].num_samples
            mbs += 1

        # log a summary of the stats for the epoch
        progress_printer.epoch_summary(with_metric=True)

    # done: save the final model
    print("Saving final model to '%s'" % model_path(max_epochs))
    s2smodel.save(model_path(max_epochs))
    print("%d epochs complete." % max_epochs)