コード例 #1
0
def create_trainer(network, minibatch_size, epoch_size, num_quantization_bits, block_size, warm_up, progress_printer):
    if network['name'] == 'resnet20':
        lr_per_mb = [1.0]*80 + [0.1]*40 + [0.01]
    elif network['name'] == 'resnet110':
        lr_per_mb = [0.1]*1 + [1.0]*80 + [0.1]*40 + [0.01]
    else:
        return RuntimeError("Unknown model name!")

    l2_reg_weight = 0.0001

    # Set learning parameters
    minibatch_size = 128
    lr_per_sample = [lr/minibatch_size for lr in lr_per_mb]
    lr_schedule = learning_parameter_schedule(lr_per_mb, minibatch_size = minibatch_size, epoch_size=epoch_size)
    mm_schedule = momentum_schedule(0.9, minibatch_size = minibatch_size)
    # learner object
    if block_size != None and num_quantization_bits != 32:
        raise RuntimeError("Block momentum cannot be used with quantization, please remove quantized_bits option.")

    local_learner = momentum_sgd(network['output'].parameters, lr_schedule, mm_schedule,
                                 l2_regularization_weight=l2_reg_weight)

    if block_size != None:
        learner = block_momentum_distributed_learner(local_learner, block_size=block_size)
    else:
        learner = data_parallel_distributed_learner(local_learner, num_quantization_bits=num_quantization_bits, distributed_after=warm_up)

    return Trainer(network['output'], (network['ce'], network['pe']), learner, progress_printer)
コード例 #2
0
def train_model(reader, model, criterion, epoch_size=50000, max_epochs=80):
    minibatch_size = 64

    # learning parameters
    learner = momentum_sgd(model.parameters, 
                           lr       = learning_parameter_schedule([0.0015625]*20+[0.00046875]*20+[0.00015625]*20+[0.000046875]*10+[0.000015625], minibatch_size=1, epoch_size=epoch_size),
                           momentum = momentum_as_time_constant_schedule([0]*20+[600]*20+[1200], epoch_size=epoch_size),
                           l2_regularization_weight = 0.002)
    
    # trainer object
    trainer = Trainer(None, criterion, learner)

    # perform model training
    log_number_of_parameters(model) ; print()
    progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs)

    for epoch in range(max_epochs):       # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            mb = reader.next_minibatch(min(minibatch_size, epoch_size - sample_count)) # fetch minibatch.
            #trainer.train_minibatch(mb[reader.streams.features], mb[reader.streams.labels])
            trainer.train_minibatch({criterion.arguments[0]: mb[reader.streams.features], criterion.arguments[1]: mb[reader.streams.labels]})
            sample_count += mb[reader.streams.labels].num_samples                     # count samples processed so far
            progress_printer.update_with_trainer(trainer, with_metric=True) # log progress

        loss, metric, actual_samples = progress_printer.epoch_summary(with_metric=True)
        model.save(os.path.join(model_path, "ConvNet_CIFAR10_DataAug_{}.dnn".format(epoch)))

    # return evaluation error.
    return loss, metric # return values from last epoch
コード例 #3
0
def create_trainer(network, epoch_size, num_epochs, minibatch_size, num_quantization_bits, progress_printer):
    
    # CNTK weights new gradient by (1-momentum) for unit gain, 
    # thus we divide Caffe's learning rate by (1-momentum)
    initial_learning_rate = 2.0 # equal to 0.2 in caffe
    initial_learning_rate *= minibatch_size / 128
    learn_rate_adjust_interval = 2
    learn_rate_decrease_factor = 0.94

    # Set learning parameters
    lr_per_mb = []
    learning_rate = initial_learning_rate
    for i in range(0, num_epochs, learn_rate_adjust_interval):
        lr_per_mb.extend([learning_rate] * learn_rate_adjust_interval)
        learning_rate *= learn_rate_decrease_factor

    lr_schedule       = learning_parameter_schedule(lr_per_mb, epoch_size=epoch_size)
    mm_schedule       = momentum_schedule(0.9)
    l2_reg_weight     = 0.0001 # CNTK L2 regularization is per sample, thus same as Caffe
    
    # Create learner
    local_learner = momentum_sgd(network['output'].parameters, lr_schedule, mm_schedule,
                                 l2_regularization_weight=l2_reg_weight)
    parameter_learner = data_parallel_distributed_learner(
        local_learner, 
        num_quantization_bits=num_quantization_bits,
        distributed_after=0)

    # Create trainer
    return Trainer(network['output'], (network['ce'], network['pe']), parameter_learner, progress_printer)
コード例 #4
0
def train_model(reader, model, criterion, epoch_size=50000, max_epochs=80):
    minibatch_size = 64

    # learning parameters
    learner = momentum_sgd(model.parameters,
                           lr=learning_parameter_schedule(
                               [0.0015625] * 20 + [0.00046875] * 20 + [0.00015625] * 20 + [0.000046875] * 10 + [
                                   0.000015625], minibatch_size=1, epoch_size=epoch_size),
                           momentum=momentum_as_time_constant_schedule([0] * 20 + [600] * 20 + [1200],
                                                                       epoch_size=epoch_size),
                           l2_regularization_weight=0.002)

    # trainer object
    trainer = Trainer(None, criterion, learner)

    # perform model training
    log_number_of_parameters(model);
    print()
    progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs)

    for epoch in range(max_epochs):  # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            mb = reader.next_minibatch(min(minibatch_size, epoch_size - sample_count))  # fetch minibatch.
            # trainer.train_minibatch(mb[reader.streams.features], mb[reader.streams.labels])
            trainer.train_minibatch({criterion.arguments[0]: mb[reader.streams.features],
                                     criterion.arguments[1]: mb[reader.streams.labels]})
            sample_count += mb[reader.streams.labels].num_samples  # count samples processed so far
            progress_printer.update_with_trainer(trainer, with_metric=True)  # log progress

        loss, metric, actual_samples = progress_printer.epoch_summary(with_metric=True)
        model.save(os.path.join(model_path, "ConvNet_CIFAR10_DataAug_{}.dnn".format(epoch)))

    # return evaluation error.
    return loss, metric  # return values from last epoch
コード例 #5
0
def create_trainer(network, epoch_size, num_epochs, minibatch_size,
                   num_quantization_bits, progress_printer):

    # CNTK weights new gradient by (1-momentum) for unit gain,
    # thus we divide Caffe's learning rate by (1-momentum)
    initial_learning_rate = 2.0  # equal to 0.2 in caffe
    initial_learning_rate *= minibatch_size / 128
    learn_rate_adjust_interval = 2
    learn_rate_decrease_factor = 0.94

    # Set learning parameters
    lr_per_mb = []
    learning_rate = initial_learning_rate
    for i in range(0, num_epochs, learn_rate_adjust_interval):
        lr_per_mb.extend([learning_rate] * learn_rate_adjust_interval)
        learning_rate *= learn_rate_decrease_factor

    lr_schedule = learning_parameter_schedule(lr_per_mb, epoch_size=epoch_size)
    mm_schedule = momentum_schedule(0.9)
    l2_reg_weight = 0.0001  # CNTK L2 regularization is per sample, thus same as Caffe

    # Create learner
    local_learner = momentum_sgd(network['output'].parameters,
                                 lr_schedule,
                                 mm_schedule,
                                 l2_regularization_weight=l2_reg_weight)
    parameter_learner = data_parallel_distributed_learner(
        local_learner,
        num_quantization_bits=num_quantization_bits,
        distributed_after=0)

    # Create trainer
    return Trainer(network['output'], (network['ce'], network['pe']),
                   parameter_learner, progress_printer)
コード例 #6
0
def Evaluator(criterion):
    loss, metric = Trainer._get_loss_metric(criterion)
    parameters = set(loss.parameters)
    if metric:
        parameters |= set(metric.parameters)
    dummy_learner = momentum_sgd(tuple(parameters),
                                 lr=learning_parameter_schedule(1),
                                 momentum=momentum_schedule(0))
    return Trainer(None, (loss, metric), dummy_learner)
コード例 #7
0
ファイル: Sequence2Sequence.py プロジェクト: AllanYiin/CNTK
def Evaluator(model, criterion):
    from cntk import Trainer
    from cntk.learners import momentum_sgd, momentum_schedule_per_sample
    loss, metric = Trainer._get_loss_metric(criterion)
    parameters = set(loss.parameters)
    if model:
        parameters |= set(model.parameters)
    if metric:
        parameters |= set(metric.parameters)
    dummy_learner = momentum_sgd(tuple(parameters),
                                 lr = learning_parameter_schedule(1),
                                 momentum = momentum_schedule_per_sample(0))
    return Trainer(model, (loss, metric), dummy_learner)
コード例 #8
0
def Evaluator(model, criterion):
    from cntk import Trainer
    from cntk.learners import momentum_sgd, momentum_schedule_per_sample
    loss, metric = Trainer._get_loss_metric(criterion)
    parameters = set(loss.parameters)
    if model:
        parameters |= set(model.parameters)
    if metric:
        parameters |= set(metric.parameters)
    dummy_learner = momentum_sgd(tuple(parameters),
                                 lr=learning_parameter_schedule(1),
                                 momentum=momentum_schedule_per_sample(0))
    return Trainer(model, (loss, metric), dummy_learner)
コード例 #9
0
ファイル: learner_test.py プロジェクト: yuxunhe/CNTK
def test_learner_update():
    i = C.input_variable(shape=(1,), needs_gradient=True, name='a')
    w_init = 1
    w = parameter(shape=(1,), init=w_init)
    res = i * w

    learner = sgd(res.parameters, lr=C.learning_parameter_schedule([0.1]*50 + [0.2]*50, minibatch_size = 1, epoch_size=1))
    assert learner.learning_rate() == 0.1
    x = learner.update({w: np.asarray([[2.]], dtype=np.float32)}, 100)
    assert learner.learning_rate() == 0.2
    assert w.value < w_init

    learner.reset_learning_rate(learning_parameter_schedule([0.3]*50 + [0.4]*50, minibatch_size = 1, epoch_size=1));
    assert learner.learning_rate() == 0.3
    x = learner.update({w: np.asarray([[2.]], dtype=np.float32)}, 100)
    assert learner.learning_rate() == 0.4
コード例 #10
0
ファイル: learner_test.py プロジェクト: junaidnaseer/CNTK
def test_learner_update():
    i = C.input_variable(shape=(1,), needs_gradient=True, name='a')
    w_init = 1
    w = parameter(shape=(1,), init=w_init)
    res = i * w

    learner = sgd(res.parameters, lr=C.learning_parameter_schedule([0.1]*50 + [0.2]*50, minibatch_size = 1, epoch_size=1))
    assert learner.learning_rate() == 0.1
    x = learner.update({w: np.asarray([[2.]], dtype=np.float32)}, 100)
    assert learner.learning_rate() == 0.2
    assert w.value < w_init

    learner.reset_learning_rate(learning_parameter_schedule([0.3]*50 + [0.4]*50, minibatch_size = 1, epoch_size=1));
    assert learner.learning_rate() == 0.3
    x = learner.update({w: np.asarray([[2.]], dtype=np.float32)}, 100)
    assert learner.learning_rate() == 0.4
コード例 #11
0
ファイル: CNTK_102_FeedForward.py プロジェクト: gchoi/CNTK
def ffnet():
    input_dim = 2
    num_output_classes = 2
    num_hidden_layers = 2
    hidden_layers_dim = 50

    # Input variables denoting the features and label data
    feature = input_variable((input_dim), np.float32)
    label = input_variable((num_output_classes), np.float32)

    netout = Sequential([
        For(range(num_hidden_layers),
            lambda i: Dense(hidden_layers_dim, activation=sigmoid)),
        Dense(num_output_classes)
    ])(feature)

    ce = cross_entropy_with_softmax(netout, label)
    pe = classification_error(netout, label)

    lr_per_minibatch = learning_parameter_schedule(0.5)
    # Instantiate the trainer object to drive the model training
    learner = sgd(netout.parameters, lr=lr_per_minibatch)
    progress_printer = ProgressPrinter(128)
    trainer = Trainer(netout, (ce, pe), learner, progress_printer)

    # Get minibatches of training data and perform model training
    minibatch_size = 25

    for i in range(1024):
        features, labels = generate_random_data(minibatch_size, input_dim,
                                                num_output_classes)
        # Specify the mapping of input variables in the model to actual
        # minibatch data to be trained with
        trainer.train_minibatch({feature: features, label: labels})

    trainer.summarize_training_progress()
    test_features, test_labels = generate_random_data(minibatch_size,
                                                      input_dim,
                                                      num_output_classes)
    avg_error = trainer.test_minibatch({
        feature: test_features,
        label: test_labels
    })
    return avg_error
def create_trainer(network, minibatch_size, epoch_size, num_quantization_bits,
                   block_size, warm_up, progress_printer):
    if network['name'] == 'resnet20':
        lr_per_mb = [1.0] * 80 + [0.1] * 40 + [0.01]
    elif network['name'] == 'resnet110':
        lr_per_mb = [0.1] * 1 + [1.0] * 80 + [0.1] * 40 + [0.01]
    else:
        return RuntimeError("Unknown model name!")

    l2_reg_weight = 0.0001

    # Set learning parameters
    minibatch_size = 128
    lr_per_sample = [lr / minibatch_size for lr in lr_per_mb]
    lr_schedule = learning_parameter_schedule(lr_per_mb,
                                              minibatch_size=minibatch_size,
                                              epoch_size=epoch_size)
    mm_schedule = momentum_schedule(0.9, minibatch_size=minibatch_size)
    # learner object
    if block_size != None and num_quantization_bits != 32:
        raise RuntimeError(
            "Block momentum cannot be used with quantization, please remove quantized_bits option."
        )

    local_learner = momentum_sgd(network['output'].parameters,
                                 lr_schedule,
                                 mm_schedule,
                                 l2_regularization_weight=l2_reg_weight)

    if block_size != None:
        learner = block_momentum_distributed_learner(local_learner,
                                                     block_size=block_size)
    else:
        learner = data_parallel_distributed_learner(
            local_learner,
            num_quantization_bits=num_quantization_bits,
            distributed_after=warm_up)

    return Trainer(network['output'], (network['ce'], network['pe']), learner,
                   progress_printer)
コード例 #13
0
ファイル: FeedForwardNet.py プロジェクト: AllanYiin/CNTK
def ffnet():
    input_dim = 2
    num_output_classes = 2
    num_hidden_layers = 2
    hidden_layers_dim = 50

    # Input variables denoting the features and label data
    feature = input_variable((input_dim), np.float32)
    label = input_variable((num_output_classes), np.float32)

    netout = Sequential([For(range(num_hidden_layers), lambda i: Dense(hidden_layers_dim, activation=sigmoid)),
                         Dense(num_output_classes)])(feature)

    ce = cross_entropy_with_softmax(netout, label)
    pe = classification_error(netout, label)

    lr_per_minibatch = learning_parameter_schedule(0.5)
    # Instantiate the trainer object to drive the model training
    learner = sgd(netout.parameters, lr=lr_per_minibatch)
    progress_printer = ProgressPrinter(128)
    trainer = Trainer(netout, (ce, pe), learner, progress_printer)

    # Get minibatches of training data and perform model training
    minibatch_size = 25

    for i in range(1024):
        features, labels = generate_random_data(
            minibatch_size, input_dim, num_output_classes)
        # Specify the mapping of input variables in the model to actual
        # minibatch data to be trained with
        trainer.train_minibatch({feature: features, label: labels})

    trainer.summarize_training_progress()
    test_features, test_labels = generate_random_data(
        minibatch_size, input_dim, num_output_classes)
    avg_error = trainer.test_minibatch(
        {feature: test_features, label: test_labels})
    return avg_error
コード例 #14
0
def train_model(base_model_file, feature_node_name, last_hidden_node_name,
                image_width, image_height, num_channels, num_classes, train_map_file,
                num_epochs, max_images=-1, freeze=False):
    epoch_size = sum(1 for line in open(train_map_file))
    if max_images > 0:
        epoch_size = min(epoch_size, max_images)

    # Create the minibatch source and input variables
    minibatch_source = create_mb_source(train_map_file, image_width, image_height, num_channels, num_classes)
    image_input = C.input_variable((num_channels, image_height, image_width))
    label_input = C.input_variable(num_classes)

    # Define mapping from reader streams to network inputs
    input_map = {
        image_input: minibatch_source[features_stream_name],
        label_input: minibatch_source[label_stream_name]
    }

    # Instantiate the transfer learning model and loss function
    tl_model = create_model(base_model_file, feature_node_name, last_hidden_node_name, num_classes, image_input, freeze)
    ce = cross_entropy_with_softmax(tl_model, label_input)
    pe = classification_error(tl_model, label_input)

    # Instantiate the trainer object
    lr_schedule = learning_parameter_schedule(lr_per_mb)
    mm_schedule = momentum_schedule(momentum_per_mb)
    learner = momentum_sgd(tl_model.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight)
    progress_printer = ProgressPrinter(tag='Training', log_to_file=log_file_name, num_epochs=num_epochs)
    #progress_printer = ProgressPrinter(tag='Training', log_to_file=log_file_name, num_epochs=num_epochs)
    trainer = Trainer(tl_model, (ce, pe), learner, progress_printer)

    # Get minibatches of images and perform model training
    print("Training transfer learning model for {0} epochs (epoch_size = {1}).".format(num_epochs, epoch_size))
    batch_index = 0
    plot_data = {'batchindex': list(), 'loss': list(), 'error': list()}
    log_number_of_parameters(tl_model)
    for epoch in range(num_epochs):       # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data = minibatch_source.next_minibatch(min(mb_size, epoch_size-sample_count), input_map=input_map)
            trainer.train_minibatch(data)                                    # update model with it
            sample_count += trainer.previous_minibatch_sample_count          # count samples processed so far
            #if sample_count % (100 * mb_size) == 0:
            #    print ("Processed {0} samples".format(sample_count))
            # For visualization...
            #print("type of plot_data:", type(plot_data), type(plot_data['batchindex']), type(plot_data['loss']),type(plot_data['error']))
            plot_data['batchindex'].append(batch_index)
            plot_data['loss'].append(trainer.previous_minibatch_loss_average)
            plot_data['error'].append(trainer.previous_minibatch_evaluation_average)
            batch_index += 1

        trainer.summarize_training_progress()

    # Visualize training result:
    window_width = 32
    loss_cumsum = np.cumsum(np.insert(plot_data['loss'], 0, 0))
    error_cumsum = np.cumsum(np.insert(plot_data['error'], 0, 0))
    # Moving average.
    plot_data['batchindex'] = np.insert(plot_data['batchindex'], 0, 0)[window_width:]
    plot_data['avg_loss'] = (loss_cumsum[window_width:] - loss_cumsum[:-window_width]) / window_width
    plot_data['avg_error'] = (error_cumsum[window_width:] - error_cumsum[:-window_width]) / window_width
    plt.figure(1)
    #plt.subplot(211)
    plt.plot(plot_data["batchindex"], plot_data["avg_loss"], 'b--')
    plt.xlabel('Minibatch number')
    plt.ylabel('Loss')
    plt.title('Minibatch run vs. Training loss ')
    #plt.show()
    plt.savefig(output_figure_loss, bbox_inches='tight' )

    plt.figure(2)
    #plt.subplot(212)
    plt.plot(plot_data["batchindex"], plot_data["avg_error"], 'r--')
    plt.xlabel('Minibatch number')
    plt.ylabel('Label Prediction Error')
    plt.title('Minibatch run vs. Label Prediction Error ')
    #plt.show()
    plt.savefig(output_figure_error, bbox_inches='tight')

    return tl_model
コード例 #15
0
ファイル: AutTrainer.py プロジェクト: databill86/autcar
    def train(self,
              path_to_folder: str,
              model_definition,
              epochs: int = 10,
              output_model_path: str = "driver_model.onnx"):

        path_to_folder = path_to_folder.rstrip('/')

        map_file_train = path_to_folder + "/train_map.txt"
        map_file_test = path_to_folder + "/test_map.txt"
        mean_file = path_to_folder + "/meanfile.xml"
        classes_set = set()
        num_train = 0
        num_test = 0
        num_channels = 3

        try:
            with open(map_file_train) as f:
                csv_reader = csv.reader(f, delimiter='\t')
                for row in csv_reader:
                    cmd = row[1]
                    classes_set.add(cmd)
                    num_train = num_train + 1
        except Exception as e:
            raise Exception(
                "No train_map.txt file found in path " + path_to_folder +
                ". Did you create a dataset using create_balanced_dataset()?")

        num_classes = len(classes_set)

        with open(map_file_test) as f:
            for num_test, l in enumerate(f):
                pass

        transforms = [
            xforms.scale(width=self.__image_width,
                         height=self.__image_height,
                         channels=num_channels,
                         interpolations='linear'),
            xforms.mean(mean_file),
        ]

        # ImageDeserializer loads images in the BGR format, not RGB
        reader_train = MinibatchSource(
            ImageDeserializer(
                map_file_train,
                StreamDefs(features=StreamDef(field='image',
                                              transforms=transforms),
                           labels=StreamDef(field='label',
                                            shape=num_classes))))

        reader_test = MinibatchSource(
            ImageDeserializer(
                map_file_test,
                StreamDefs(features=StreamDef(field='image',
                                              transforms=transforms),
                           labels=StreamDef(field='label',
                                            shape=num_classes))))

        input_var = input_variable(
            (num_channels, self.__image_height, self.__image_width))
        label_var = input_variable((num_classes))

        # Normalize the input
        feature_scale = 1.0 / 256.0
        input_var_norm = element_times(feature_scale, input_var)

        model = model_definition(input_var)

        ce = cross_entropy_with_softmax(model, label_var)
        pe = classification_error(model, label_var)

        epoch_size = num_train
        minibatch_size = 64

        lr_per_minibatch = learning_parameter_schedule([0.01] * 10 +
                                                       [0.003] * 10 + [0.001],
                                                       epoch_size=epoch_size)
        momentums = momentum_schedule(0.9, minibatch_size=minibatch_size)
        l2_reg_weight = 0.001

        learner = momentum_sgd(model.parameters,
                               lr=lr_per_minibatch,
                               momentum=momentums,
                               l2_regularization_weight=l2_reg_weight)
        progress_printer = ProgressPrinter(tag='Training', num_epochs=epochs)
        trainer = cntk.train.Trainer(model, (ce, pe), [learner],
                                     [progress_printer])

        input_map = {
            input_var: reader_train.streams.features,
            label_var: reader_train.streams.labels
        }

        batch_index = 0
        plot_data = {'batchindex': [], 'loss': [], 'error': []}
        for epoch in range(epochs):
            sample_count = 0
            while sample_count < epoch_size:
                data = reader_train.next_minibatch(min(
                    minibatch_size, epoch_size - sample_count),
                                                   input_map=input_map)

                trainer.train_minibatch(data)
                sample_count += data[label_var].num_samples

                plot_data['batchindex'].append(batch_index)
                plot_data['loss'].append(
                    trainer.previous_minibatch_loss_average)
                plot_data['error'].append(
                    trainer.previous_minibatch_evaluation_average)

                batch_index += 1
            trainer.summarize_training_progress()

        epoch_size = num_test
        minibatch_size = 16

        metric_numer = 0
        metric_denom = 0
        sample_count = 0
        minibatch_index = 0

        while sample_count < epoch_size:
            current_minibatch = min(minibatch_size, epoch_size - sample_count)

            data = reader_test.next_minibatch(current_minibatch,
                                              input_map=input_map)

            metric_numer += trainer.test_minibatch(data) * current_minibatch
            metric_denom += current_minibatch

            sample_count += data[label_var].num_samples
            minibatch_index += 1

        print("")
        print("Final Results: Minibatch[1-{}]: errs = {:0.1f}% * {}".format(
            minibatch_index + 1, (metric_numer * 100.0) / metric_denom,
            metric_denom))
        print("")

        model.save(output_model_path, format=ModelFormat.ONNX)
コード例 #16
0
    def __train_cntk(self, path_to_folder: str, model_definition, epochs: int,
                     output_model_path: str, classes, minibatch_size: int):
        import cntk
        from cntk.learners import learning_parameter_schedule
        from cntk.ops import input_variable
        from cntk.io import MinibatchSource, ImageDeserializer, StreamDefs, StreamDef, MinibatchData, UserDeserializer
        import cntk.io.transforms as xforms
        from cntk.layers import default_options, Dense, Sequential, Activation, Embedding, Convolution2D, MaxPooling, Stabilizer, Convolution, Dropout, BatchNormalization
        from cntk.ops.functions import CloneMethod
        from cntk.logging import ProgressPrinter
        from cntk.losses import cross_entropy_with_softmax
        from cntk import classification_error, softmax, relu, ModelFormat, element_times, momentum_schedule, momentum_sgd
        import pandas as pd

        path_to_folder = path_to_folder.rstrip('/')

        map_file_train = path_to_folder + "/train_map.txt"
        map_file_test = path_to_folder + "/test_map.txt"
        classes_set = set()
        num_train = 0
        num_test = 0
        num_channels = 3

        class TrackDataset(UserDeserializer):
            def __init__(self, map_file, streams, chunksize=100):
                super(TrackDataset, self).__init__()
                self._batch_size = chunksize
                self.dataframes = pd.read_csv(map_file,
                                              sep='\t',
                                              dtype=str,
                                              header=None,
                                              names=["features", "labels"])
                self._streams = [
                    cntk.io.StreamInformation(s['name'], i, 'dense',
                                              np.float32, s['shape'])
                    for i, s in enumerate(streams)
                ]

                self._num_chunks = int(
                    math.ceil(len(self.dataframes) / chunksize))

            def _scale_image(self, image, width=224, height=168):
                try:
                    return image.resize((width, height), Image.LINEAR)
                except:
                    raise Exception('scale_image error')

            def stream_infos(self):
                return self._streams

            def num_chunks(self):
                return self._num_chunks

            def get_chunk(self, chunk_id):
                images = []
                labels = []
                maximum = (chunk_id + 1) * self._batch_size
                if (maximum > len(self.dataframes)):
                    maximum = len(self.dataframes)
                for i in range(chunk_id * self._batch_size, maximum):
                    img_name = self.dataframes.iloc[i, 0]
                    image = Image.open(img_name)
                    cl = self.dataframes.iloc[i, 1:].values[0]
                    image = self._scale_image(image)
                    image = np.moveaxis((np.array(image).astype('float32')),
                                        -1, 0)
                    image -= np.mean(image, keepdims=True)
                    image /= (np.std(image, keepdims=True) + 1e-6)
                    images.append(image)
                    yv = np.zeros(num_classes)
                    yv[classes.index(cl)] = 1
                    labels.append(yv)

                result = {}
                features = np.array(images)
                lab = np.array(labels).astype('float32')
                result[self._streams[0].m_name] = features
                result[self._streams[1].m_name] = lab
                return result

        try:
            with open(map_file_train) as f:
                csv_reader = csv.reader(f, delimiter='\t')
                for row in csv_reader:
                    cmd = row[1]
                    classes_set.add(cmd)
                    num_train = num_train + 1
        except Exception as e:
            raise Exception(
                "No train_map.txt file found in path " + path_to_folder +
                ". Did you create a dataset using create_balanced_dataset()?")

        num_classes = len(classes)

        with open(map_file_test) as f:
            for num_test, l in enumerate(f):
                pass

        # transforms = [
        #     xforms.scale(width=self.__image_width, height=self.__image_height, channels=num_channels, interpolations='linear'),
        #     xforms.mean(mean_file)
        # ]

        dataset_train = TrackDataset(map_file=map_file_train,
                                     streams=[
                                         dict(name='features',
                                              shape=(num_channels,
                                                     self.__image_height,
                                                     self.__image_width)),
                                         dict(name='labels',
                                              shape=(num_classes, ))
                                     ])
        reader_train = MinibatchSource([dataset_train], randomize=True)

        # a = dataset_train.num_chunks()

        dataset_test = TrackDataset(map_file=map_file_test,
                                    streams=[
                                        dict(name='features',
                                             shape=(num_channels,
                                                    self.__image_height,
                                                    self.__image_width)),
                                        dict(name='labels',
                                             shape=(num_classes, ))
                                    ])
        reader_test = MinibatchSource([dataset_test], randomize=True)

        # ImageDeserializer loads images in the BGR format, not RGB
        # reader_train = MinibatchSource(ImageDeserializer(map_file_train, StreamDefs(
        #     features = StreamDef(field='image', transforms=transforms),
        #     labels   = StreamDef(field='label', shape=num_classes)
        # )))

        # reader_test = MinibatchSource(ImageDeserializer(map_file_test, StreamDefs(
        #     features = StreamDef(field='image', transforms=transforms),
        #     labels   = StreamDef(field='label', shape=num_classes)
        # )))

        # mb = reader_train.next_minibatch(10)

        input_var = input_variable(
            (num_channels, self.__image_height, self.__image_width))
        label_var = input_variable((num_classes))

        model = model_definition(input_var)

        ce = cross_entropy_with_softmax(model, label_var)
        pe = classification_error(model, label_var)

        epoch_size = num_train

        lr_per_minibatch = learning_parameter_schedule([0.01] * 10 +
                                                       [0.003] * 10 + [0.001],
                                                       epoch_size=epoch_size)
        momentums = momentum_schedule(0.9, minibatch_size=minibatch_size)
        l2_reg_weight = 0.001

        learner = momentum_sgd(model.parameters,
                               lr=lr_per_minibatch,
                               momentum=momentums,
                               l2_regularization_weight=l2_reg_weight)
        progress_printer = ProgressPrinter(tag='Training', num_epochs=epochs)
        trainer = cntk.train.Trainer(model, (ce, pe), [learner],
                                     [progress_printer])

        input_map = {
            input_var: reader_train.streams.features,
            label_var: reader_train.streams.labels
        }

        print("Training started")
        batch_index = 0
        plot_data = {'batchindex': [], 'loss': [], 'error': []}
        for epoch in range(epochs):
            sample_count = 0
            while sample_count < epoch_size:
                data: MinibatchSource = reader_train.next_minibatch(
                    min(minibatch_size, epoch_size - sample_count),
                    input_map=input_map)

                trainer.train_minibatch(data)
                sample_count += data[label_var].num_samples

                batch_index += 1
                plot_data['batchindex'].append(batch_index)
                plot_data['loss'].append(
                    trainer.previous_minibatch_loss_average)
                plot_data['error'].append(
                    trainer.previous_minibatch_evaluation_average)

            trainer.summarize_training_progress()

        metric_numer = 0
        metric_denom = 0
        sample_count = 0
        minibatch_index = 0
        epoch_size = num_test

        while sample_count < epoch_size:
            current_minibatch = min(minibatch_size, epoch_size - sample_count)

            data = reader_test.next_minibatch(current_minibatch,
                                              input_map=input_map)

            metric_numer += trainer.test_minibatch(data) * current_minibatch
            metric_denom += current_minibatch

            sample_count += data[label_var].num_samples
            minibatch_index += 1

        print("")
        print("Final Results: Minibatch[1-{}]: errs = {:0.1f}% * {}".format(
            minibatch_index + 1, (metric_numer * 100.0) / metric_denom,
            metric_denom))
        print("")

        model.save(output_model_path, format=ModelFormat.ONNX)
コード例 #17
0
def test_learning_parameter_schedule(params, expectation, minibatch_size):
    l = learning_parameter_schedule(*params)
    assert l.minibatch_size == minibatch_size
    assert [l[i] for i in range(len(expectation))] == expectation
コード例 #18
0
ファイル: learner_test.py プロジェクト: junaidnaseer/CNTK
def test_learner_init_legacy():
    i = C.input_variable(shape=(1,), needs_gradient=True, name='a')
    w = parameter(shape=(1,))

    res = i * w

    # for backcompatibility test
    # this will be deprecated in future version
    learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.sample))
    assert learner._learning_rate_schedule.minibatch_size == 1  # the deprecated per sample schedule should not use compatible mode
    assert learner.learning_rate() == 0.1

    # for backcompatibility test
    # this will be deprecated in future version
    # The UnitType will provide per minibatch instruction for the learner
    # this will be deprecated in future version
    learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.minibatch))
    assert learner.is_compatible_mode() == False
    assert learner.learning_rate() == 0.1
    assert learner.minibatch_size == C.learners.IGNORE
    assert learner._learning_rate_schedule.minibatch_size == 0

    # for backcompatibility test, in reset learning rate, the learner won't receive the reference minibatch size from the schedule
    # user will need to specify the reference minibatch size explicitly
    # this will be deprecated in future version
    learner = sgd(res.parameters, lr=0.1)
    learner.reset_learning_rate(learning_rate_schedule([1, 2, 3], UnitType.minibatch))
    assert learner.learning_rate() == 1.0
    learner.minibatch_size = C.learners.IGNORE  # reset to be per minibatch
    assert learner.minibatch_size == C.learners.IGNORE
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.is_compatible_mode() == True

    # for backcompatibility test
    # this will be deprecated in future version
    learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.sample), minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.learning_rate() == 0.1
    assert learner.minibatch_size == C.learners.IGNORE  # the learner's reference minibatch size is still 0

    # this will be deprecated in future version: This is logical invalid combination but it was the only way to use mean gradient and set learning rate in the past.
    learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.sample), use_mean_gradient=True)
    assert learner.is_compatible_mode() == True
    assert learner.learning_rate() == 0.1
    #test the override in the new version
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.minibatch_size == C.learners.IGNORE  # the learner's reference minibatch size is still 0


    # for backcompatibility test
    # this will be deprecated in future version
    # The UnitType will provide per minibatch instruction for the learner
    # this will be deprecated in future version
    learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.minibatch), minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.learning_rate() == 0.1
    assert learner.minibatch_size == C.learners.IGNORE
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE

    # for backcompatibility test, in reset learning rate, the learner won't receive the reference minibatch size from the schedule
    # user will need to specify the reference minibatch size explicitly
    # this will be deprecated in future version
    learner = sgd(res.parameters, lr=0.1)
    learner.reset_learning_rate(learning_rate_schedule([1, 2, 3], UnitType.minibatch))
    assert learner.learning_rate() == 1.0
    learner.minibatch_size = C.learners.IGNORE  # reset to be per minibatch
    assert learner.minibatch_size == C.learners.IGNORE
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.is_compatible_mode() == True

    learner_parameter = learner.parameters
    from cntk.variables import Parameter
    param = learner_parameter[0]
    assert isinstance(param, Parameter)

    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    # back compatible API test
    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = learning_parameter_schedule(0.1, minibatch_size=1)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant, unit_gain_value)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant, unit_gain=unit_gain_value)

    C.set_default_unit_gain_value(False)
    unit_gain_value = C.default_unit_gain_value()
    assert not unit_gain_value

    C.set_default_unit_gain_value(True)
    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    lr_per_sample = learning_rate_schedule([(3, 0.1), (2, 0.2), (1, 0.3)], unit=UnitType.sample)
    C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant)
    C.fsadagrad(res.parameters, lr_per_sample, momentum_time_constant, unit_gain_value)
    C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant, unit_gain=unit_gain_value)

    gamma, inc, dec, max, min = [0.5, 1.2, 0.7, 10, 1e-8]
    lr_per_sample = learning_rate_schedule([0.1, 0.2], unit=UnitType.sample, epoch_size=100)
    C.rmsprop(res.parameters, lr_per_sample, gamma, inc, dec, max, min, True)

    C.adadelta(res.parameters, lr_per_sample, use_mean_gradient=True)
コード例 #19
0
ファイル: learner_test.py プロジェクト: junaidnaseer/CNTK
def test_learner_init():
    i = C.input_variable(shape=(1,), needs_gradient=True, name='a')
    w = parameter(shape=(1,))

    res = i * w

    #test new API: learning_parameter_schedule

    #explicitly specify reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=0.1, minibatch_size = 25)
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == 25 #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 25
    assert learner.learning_rate() == 0.1

    #no explicitly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1))
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1


    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20), minibatch_size = 25)
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == 25 #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 20
    assert learner.learning_rate() == 0.1


    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20))
    assert learner.is_compatible_mode() == False
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 20
    assert learner.learning_rate() == 0.1

    #no explicitly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1))
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1


    #no explicitly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1), minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1


    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20), minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 20
    assert learner.learning_rate() == 0.1

    #no explicitly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1), minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1

    mysgd = C.sgd(parameters=res.parameters, lr=0.4, minibatch_size=32)
    assert mysgd.minibatch_size == 32
    assert mysgd._learning_rate_schedule.minibatch_size == 32
    assert mysgd.learning_rate() == 0.4

    mymomentum = C.momentum_sgd(parameters=res.parameters, lr=0.4, momentum=0.9, minibatch_size=32)
    assert mymomentum.minibatch_size == 32
    assert mymomentum._learning_rate_schedule.minibatch_size == 32
    assert mymomentum.learning_rate() == 0.4

    myadadelta = C.adadelta(parameters=res.parameters, lr=0.4, minibatch_size=32)
    assert myadadelta.minibatch_size == 32
    assert myadadelta._learning_rate_schedule.minibatch_size == 32
    assert myadadelta.learning_rate() == 0.4

    myadam = C.adam(parameters=res.parameters, lr=0.4, momentum=0.9, variance_momentum=0.9, minibatch_size=32)
    assert myadam.minibatch_size == 32
    assert myadam._learning_rate_schedule.minibatch_size == 32
    assert myadam.learning_rate() == 0.4

    myadagrad = C.adagrad(parameters=res.parameters, lr=0.4, minibatch_size=32)
    assert myadagrad.minibatch_size == 32
    assert myadagrad._learning_rate_schedule.minibatch_size == 32
    assert myadagrad.learning_rate() == 0.4

    myfsadagrad = C.fsadagrad(parameters=res.parameters, lr=0.4, momentum=0.9, variance_momentum=0.9,
                              minibatch_size=32)
    assert myfsadagrad.minibatch_size == 32
    assert myfsadagrad._learning_rate_schedule.minibatch_size == 32
    assert myfsadagrad.learning_rate() == 0.4

    mynesterov = C.nesterov(parameters=res.parameters, lr=0.4, momentum=0.9, minibatch_size=32)
    assert mynesterov.minibatch_size == 32
    assert mynesterov._learning_rate_schedule.minibatch_size == 32
    assert mynesterov.learning_rate() == 0.4

    myrmsrop = C.rmsprop(parameters=res.parameters, lr=0.4, gamma=0.5, inc=1.2, dec=0.7, max=10, min=1e-8,
                         minibatch_size=32)
    assert myrmsrop.minibatch_size == 32
    assert myrmsrop._learning_rate_schedule.minibatch_size == 32
    assert myrmsrop.learning_rate() == 0.4

    mysgd = C.sgd(parameters=res.parameters, lr=[0.4, 0.1, 0.001], minibatch_size=32, epoch_size=512)
    assert mysgd.minibatch_size == 32
    assert mysgd._learning_rate_schedule.minibatch_size == 32
    assert mysgd._learning_rate_schedule[0] == 0.4
    assert mysgd._learning_rate_schedule[512] == 0.1
    assert mysgd._learning_rate_schedule[512 * 2] == 0.001

    mymomentum = C.momentum_sgd(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9],
                                minibatch_size=32, epoch_size=512)
    assert mymomentum.minibatch_size == 32
    assert mymomentum._learning_rate_schedule.minibatch_size == 32
    assert mymomentum._learning_rate_schedule[0] == 0.4
    assert mymomentum._learning_rate_schedule[512] == 0.1
    assert mymomentum._learning_rate_schedule[512 * 2] == 0.001


    myadadelta = C.adadelta(parameters=res.parameters, lr=[0.4, 0.1, 0.001],
                            minibatch_size=32, epoch_size=512)
    assert myadadelta.minibatch_size == 32
    assert myadadelta._learning_rate_schedule.minibatch_size == 32
    assert myadadelta._learning_rate_schedule[0] == 0.4
    assert myadadelta._learning_rate_schedule[512] == 0.1
    assert myadadelta._learning_rate_schedule[512 * 2] == 0.001

    myadam = C.adam(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9, 0.1, 0.001], variance_momentum=[0.9],
                    minibatch_size=32, epoch_size=512)
    assert myadam.minibatch_size == 32
    assert myadam._learning_rate_schedule.minibatch_size == 32
    assert myadam._learning_rate_schedule[0] == 0.4
    assert myadam._learning_rate_schedule[512] == 0.1
    assert myadam._learning_rate_schedule[512 * 2] == 0.001

    myadagrad = C.adagrad(parameters=res.parameters, lr=[0.4, 0.1, 0.001], minibatch_size=32, epoch_size=512)
    assert myadagrad.minibatch_size == 32
    assert myadagrad._learning_rate_schedule.minibatch_size == 32
    assert myadagrad._learning_rate_schedule[0] == 0.4
    assert myadagrad._learning_rate_schedule[512] == 0.1
    assert myadagrad._learning_rate_schedule[512 * 2] == 0.001

    myfsadagrad = C.fsadagrad(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9],
                              variance_momentum=[0.9],
                              minibatch_size=32, epoch_size=512)
    assert myadagrad.minibatch_size == 32
    assert myadagrad._learning_rate_schedule.minibatch_size == 32
    assert myadagrad._learning_rate_schedule[0] == 0.4
    assert myadagrad._learning_rate_schedule[512] == 0.1
    assert myadagrad._learning_rate_schedule[512 * 2] == 0.001

    mynesterov = C.nesterov(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9],
                            minibatch_size=32, epoch_size=512)
    assert mynesterov.minibatch_size == 32
    assert mynesterov._learning_rate_schedule.minibatch_size == 32
    assert mynesterov._learning_rate_schedule[0] == 0.4
    assert mynesterov._learning_rate_schedule[512] == 0.1
    assert mynesterov._learning_rate_schedule[512 * 2] == 0.001

    myrmsrop = C.rmsprop(parameters=res.parameters, lr=[0.4, 0.1, 0.001], gamma=0.5, inc=1.2, dec=0.7, max=10,
                         min=1e-8,
                         minibatch_size=32, epoch_size=512)
    assert myrmsrop.minibatch_size == 32
    assert myrmsrop._learning_rate_schedule.minibatch_size == 32
    assert myrmsrop._learning_rate_schedule[0] == 0.4
    assert myrmsrop._learning_rate_schedule[512] == 0.1
    assert myrmsrop._learning_rate_schedule[512 * 2] == 0.001

    learner_parameter = learner.parameters
    from cntk.variables import Parameter
    param = learner_parameter[0]
    assert isinstance(param, Parameter)

    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    momentum = C.momentum_schedule(0.999, minibatch_size=1)
    lr_per_sample = learning_parameter_schedule(0.1, minibatch_size = 1)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum, unit_gain_value)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum, unit_gain=unit_gain_value)

    C.set_default_unit_gain_value(False)
    unit_gain_value = C.default_unit_gain_value()
    assert not unit_gain_value

    lr_per_sample = learning_parameter_schedule([0.1, 0.2], minibatch_size = 1)
    C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum)
    C.nesterov(res.parameters, lr_per_sample, momentum, unit_gain_value)
    C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum, unit_gain=unit_gain_value)

    lr_per_sample = learning_parameter_schedule([0.1]*3 +[0.2]*2 +[0.3], minibatch_size=1)
    C.adagrad(res.parameters, lr=lr_per_sample, need_ave_multiplier=True)

    C.set_default_unit_gain_value(True)
    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    lr_per_sample = learning_parameter_schedule([(3,0.1), (2, 0.2), (1, 0.3)], minibatch_size=1)
    C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum)
    C.fsadagrad(res.parameters, lr_per_sample, momentum, unit_gain_value)
    C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum, unit_gain=unit_gain_value)

    gamma, inc, dec, max, min = [0.5, 1.2, 0.7, 10, 1e-8]
    lr_per_sample = learning_parameter_schedule([0.1, 0.2], minibatch_size = 1, epoch_size = 100)
    C.rmsprop(res.parameters, lr_per_sample, gamma, inc, dec, max, min, True)

    C.adadelta(res.parameters, lr_per_sample)
コード例 #20
0
def test_learner_init():
    i = C.input_variable(shape=(1, ), needs_gradient=True, name='a')
    w = parameter(shape=(1, ))

    res = i * w

    #test new API: learning_parameter_schedule

    #explictly specify reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=0.1, minibatch_size=25)
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == 25  #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 25
    assert learner.learning_rate() == 0.1

    #no explictly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1))
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == C.learners.IGNORE  #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1

    learner = sgd(res.parameters,
                  lr=learning_parameter_schedule(0.1, 20),
                  minibatch_size=25)
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == 25  #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 20
    assert learner.learning_rate() == 0.1

    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20))
    assert learner.is_compatible_mode() == False
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 20
    assert learner.learning_rate() == 0.1

    #no explictly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1))
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == C.learners.IGNORE  #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1

    #no explictly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters,
                  lr=learning_parameter_schedule(0.1),
                  minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.minibatch_size == C.learners.IGNORE  #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1

    learner = sgd(res.parameters,
                  lr=learning_parameter_schedule(0.1, 20),
                  minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.minibatch_size == C.learners.IGNORE  #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 20
    assert learner.learning_rate() == 0.1

    #no explictly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters,
                  lr=learning_parameter_schedule(0.1),
                  minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.minibatch_size == C.learners.IGNORE  #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1

    mysgd = C.sgd(parameters=res.parameters, lr=0.4, minibatch_size=32)
    assert mysgd.minibatch_size == 32
    assert mysgd._learning_rate_schedule.minibatch_size == 32
    assert mysgd.learning_rate() == 0.4

    mymomentum = C.momentum_sgd(parameters=res.parameters,
                                lr=0.4,
                                momentum=0.9,
                                minibatch_size=32)
    assert mymomentum.minibatch_size == 32
    assert mymomentum._learning_rate_schedule.minibatch_size == 32
    assert mymomentum.learning_rate() == 0.4

    myadadelta = C.adadelta(parameters=res.parameters,
                            lr=0.4,
                            minibatch_size=32)
    assert myadadelta.minibatch_size == 32
    assert myadadelta._learning_rate_schedule.minibatch_size == 32
    assert myadadelta.learning_rate() == 0.4

    myadam = C.adam(parameters=res.parameters,
                    lr=0.4,
                    momentum=0.9,
                    variance_momentum=0.9,
                    minibatch_size=32)
    assert myadam.minibatch_size == 32
    assert myadam._learning_rate_schedule.minibatch_size == 32
    assert myadam.learning_rate() == 0.4

    myadagrad = C.adagrad(parameters=res.parameters, lr=0.4, minibatch_size=32)
    assert myadagrad.minibatch_size == 32
    assert myadagrad._learning_rate_schedule.minibatch_size == 32
    assert myadagrad.learning_rate() == 0.4

    myfsadagrad = C.fsadagrad(parameters=res.parameters,
                              lr=0.4,
                              momentum=0.9,
                              variance_momentum=0.9,
                              minibatch_size=32)
    assert myfsadagrad.minibatch_size == 32
    assert myfsadagrad._learning_rate_schedule.minibatch_size == 32
    assert myfsadagrad.learning_rate() == 0.4

    mynesterov = C.nesterov(parameters=res.parameters,
                            lr=0.4,
                            momentum=0.9,
                            minibatch_size=32)
    assert mynesterov.minibatch_size == 32
    assert mynesterov._learning_rate_schedule.minibatch_size == 32
    assert mynesterov.learning_rate() == 0.4

    myrmsrop = C.rmsprop(parameters=res.parameters,
                         lr=0.4,
                         gamma=0.5,
                         inc=1.2,
                         dec=0.7,
                         max=10,
                         min=1e-8,
                         minibatch_size=32)
    assert myrmsrop.minibatch_size == 32
    assert myrmsrop._learning_rate_schedule.minibatch_size == 32
    assert myrmsrop.learning_rate() == 0.4

    mysgd = C.sgd(parameters=res.parameters,
                  lr=[0.4, 0.1, 0.001],
                  minibatch_size=32,
                  epoch_size=512)
    assert mysgd.minibatch_size == 32
    assert mysgd._learning_rate_schedule.minibatch_size == 32
    assert mysgd._learning_rate_schedule[0] == 0.4
    assert mysgd._learning_rate_schedule[512] == 0.1
    assert mysgd._learning_rate_schedule[512 * 2] == 0.001

    mymomentum = C.momentum_sgd(parameters=res.parameters,
                                lr=[0.4, 0.1, 0.001],
                                momentum=[0.9],
                                minibatch_size=32,
                                epoch_size=512)
    assert mymomentum.minibatch_size == 32
    assert mymomentum._learning_rate_schedule.minibatch_size == 32
    assert mymomentum._learning_rate_schedule[0] == 0.4
    assert mymomentum._learning_rate_schedule[512] == 0.1
    assert mymomentum._learning_rate_schedule[512 * 2] == 0.001

    myadadelta = C.adadelta(parameters=res.parameters,
                            lr=[0.4, 0.1, 0.001],
                            minibatch_size=32,
                            epoch_size=512)
    assert myadadelta.minibatch_size == 32
    assert myadadelta._learning_rate_schedule.minibatch_size == 32
    assert myadadelta._learning_rate_schedule[0] == 0.4
    assert myadadelta._learning_rate_schedule[512] == 0.1
    assert myadadelta._learning_rate_schedule[512 * 2] == 0.001

    myadam = C.adam(parameters=res.parameters,
                    lr=[0.4, 0.1, 0.001],
                    momentum=[0.9, 0.1, 0.001],
                    variance_momentum=[0.9],
                    minibatch_size=32,
                    epoch_size=512)
    assert myadam.minibatch_size == 32
    assert myadam._learning_rate_schedule.minibatch_size == 32
    assert myadam._learning_rate_schedule[0] == 0.4
    assert myadam._learning_rate_schedule[512] == 0.1
    assert myadam._learning_rate_schedule[512 * 2] == 0.001

    myadagrad = C.adagrad(parameters=res.parameters,
                          lr=[0.4, 0.1, 0.001],
                          minibatch_size=32,
                          epoch_size=512)
    assert myadagrad.minibatch_size == 32
    assert myadagrad._learning_rate_schedule.minibatch_size == 32
    assert myadagrad._learning_rate_schedule[0] == 0.4
    assert myadagrad._learning_rate_schedule[512] == 0.1
    assert myadagrad._learning_rate_schedule[512 * 2] == 0.001

    myfsadagrad = C.fsadagrad(parameters=res.parameters,
                              lr=[0.4, 0.1, 0.001],
                              momentum=[0.9],
                              variance_momentum=[0.9],
                              minibatch_size=32,
                              epoch_size=512)
    assert myadagrad.minibatch_size == 32
    assert myadagrad._learning_rate_schedule.minibatch_size == 32
    assert myadagrad._learning_rate_schedule[0] == 0.4
    assert myadagrad._learning_rate_schedule[512] == 0.1
    assert myadagrad._learning_rate_schedule[512 * 2] == 0.001

    mynesterov = C.nesterov(parameters=res.parameters,
                            lr=[0.4, 0.1, 0.001],
                            momentum=[0.9],
                            minibatch_size=32,
                            epoch_size=512)
    assert mynesterov.minibatch_size == 32
    assert mynesterov._learning_rate_schedule.minibatch_size == 32
    assert mynesterov._learning_rate_schedule[0] == 0.4
    assert mynesterov._learning_rate_schedule[512] == 0.1
    assert mynesterov._learning_rate_schedule[512 * 2] == 0.001

    myrmsrop = C.rmsprop(parameters=res.parameters,
                         lr=[0.4, 0.1, 0.001],
                         gamma=0.5,
                         inc=1.2,
                         dec=0.7,
                         max=10,
                         min=1e-8,
                         minibatch_size=32,
                         epoch_size=512)
    assert myrmsrop.minibatch_size == 32
    assert myrmsrop._learning_rate_schedule.minibatch_size == 32
    assert myrmsrop._learning_rate_schedule[0] == 0.4
    assert myrmsrop._learning_rate_schedule[512] == 0.1
    assert myrmsrop._learning_rate_schedule[512 * 2] == 0.001

    learner_parameter = learner.parameters
    from cntk.variables import Parameter
    param = learner_parameter[0]
    assert isinstance(param, Parameter)

    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    momentum = C.momentum_schedule(0.999, minibatch_size=1)
    lr_per_sample = learning_parameter_schedule(0.1, minibatch_size=1)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum, unit_gain_value)
    C.momentum_sgd(res.parameters,
                   lr_per_sample,
                   momentum,
                   unit_gain=unit_gain_value)

    C.set_default_unit_gain_value(False)
    unit_gain_value = C.default_unit_gain_value()
    assert not unit_gain_value

    lr_per_sample = learning_parameter_schedule([0.1, 0.2], minibatch_size=1)
    C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum)
    C.nesterov(res.parameters, lr_per_sample, momentum, unit_gain_value)
    C.nesterov(res.parameters,
               lr=lr_per_sample,
               momentum=momentum,
               unit_gain=unit_gain_value)

    lr_per_sample = learning_parameter_schedule([0.1] * 3 + [0.2] * 2 + [0.3],
                                                minibatch_size=1)
    C.adagrad(res.parameters, lr=lr_per_sample, need_ave_multiplier=True)

    C.set_default_unit_gain_value(True)
    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    lr_per_sample = learning_parameter_schedule([(3, 0.1), (2, 0.2), (1, 0.3)],
                                                minibatch_size=1)
    C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum)
    C.fsadagrad(res.parameters, lr_per_sample, momentum, unit_gain_value)
    C.fsadagrad(res.parameters,
                lr=lr_per_sample,
                momentum=momentum,
                unit_gain=unit_gain_value)

    gamma, inc, dec, max, min = [0.5, 1.2, 0.7, 10, 1e-8]
    lr_per_sample = learning_parameter_schedule([0.1, 0.2],
                                                minibatch_size=1,
                                                epoch_size=100)
    C.rmsprop(res.parameters, lr_per_sample, gamma, inc, dec, max, min, True)

    C.adadelta(res.parameters, lr_per_sample)
コード例 #21
0
def train_model(cntkModel, params, input_map):
    log = logging.getLogger("neuralnets1.utils.train_model")
    mb_size = params['mb_size']
    num_epochs = params['num_epochs']
    epoch_size_train = params['epoch_size_train']
    epoch_size_test = params['epoch_size_test']
    minibatch_source_train = params['train_mbs']
    minibatch_source_valid = params['valid_mbs']
    #minibatch_source_test = params['test_mbs'] ;

    # Instantiate the trainer object
    #lr_schedule = learning_rate_schedule(params['learn_rate'], unit=UnitType.minibatch)
    lr_per_minibatch = learning_parameter_schedule(params['learn_rate'],
                                                   minibatch_size=mb_size,
                                                   epoch_size=epoch_size_train)

    mm_schedule = momentum_schedule(params['beta_momentum_gd'])
    learner = momentum_sgd(cntkModel.parameters,
                           lr_per_minibatch,
                           mm_schedule,
                           l2_regularization_weight=params['l2_reg_weight'])
    progress_writers = [ProgressPrinter(tag='Training', num_epochs=num_epochs)]
    trainer = Trainer(cntkModel, (params['ce'], params['pe']), learner,
                      progress_writers)

    # Run training epochs
    log.info(
        'Training transfer learning model for %s epochs (epoch_size_train = %s ) .'
        % (num_epochs, epoch_size_train))
    #   print("Training transfer learning model for {0} epochs (epoch_size_train = {1}).".format(num_epochs, epoch_size_train))
    errsVal = []
    errsTrain = []
    log_number_of_parameters(cntkModel)

    for epoch in range(num_epochs):
        err_numer = 0
        sample_counts = 0
        while sample_counts < epoch_size_train:  # Loop over minibatches in the epoch
            sample_count = min(mb_size, epoch_size_train - sample_counts)
            data = minibatch_source_train.next_minibatch(sample_count,
                                                         input_map=input_map)
            trainer.train_minibatch(data)  # Update model with it
            sample_counts += sample_count  # Count samples processed so far
            err_numer += trainer.previous_minibatch_evaluation_average * sample_count

            if sample_counts % (100 * mb_size) == 0:
                log.info("Training: processed %s samples" % sample_counts)
        # Compute accuracy on training and test sets
        errsTrain.append(err_numer / float(sample_counts))
        trainer.summarize_training_progress()
        errsVal.append(
            cntkComputeTestError(trainer, minibatch_source_valid, mb_size,
                                 epoch_size_test, input_map))
        trainer.summarize_test_progress()

        # Plot training progress
        plt.plot(errsTrain, 'b-', errsVal, 'g-')
        plt.xlabel('Epoch number')
        plt.ylabel('Error')
        plt.title('Training error (blue), validation error (green)')
        plt.draw()
    return cntkModel
コード例 #22
0
def train_model(base_model_file,
                feature_node_name,
                last_hidden_node_name,
                image_width,
                image_height,
                num_channels,
                num_classes,
                train_map_file,
                num_epochs,
                max_images=-1,
                freeze=False):
    epoch_size = sum(1 for line in open(train_map_file))
    if max_images > 0:
        epoch_size = min(epoch_size, max_images)

    # Create the minibatch source and input variables
    minibatch_source = create_mb_source(train_map_file, image_width,
                                        image_height, num_channels,
                                        num_classes)
    image_input = C.input_variable((num_channels, image_height, image_width))
    label_input = C.input_variable(num_classes)

    # Define mapping from reader streams to network inputs
    input_map = {
        image_input: minibatch_source[features_stream_name],
        label_input: minibatch_source[label_stream_name]
    }

    # Instantiate the transfer learning model and loss function
    tl_model = create_model(base_model_file, feature_node_name,
                            last_hidden_node_name, num_classes, image_input,
                            freeze)
    ce = cross_entropy_with_softmax(tl_model, label_input)
    pe = classification_error(tl_model, label_input)

    # Instantiate the trainer object
    lr_schedule = learning_parameter_schedule(lr_per_mb)
    mm_schedule = momentum_schedule(momentum_per_mb)
    learner = momentum_sgd(tl_model.parameters,
                           lr_schedule,
                           mm_schedule,
                           l2_regularization_weight=l2_reg_weight)
    progress_printer = ProgressPrinter(tag='Training', num_epochs=num_epochs)
    trainer = Trainer(tl_model, (ce, pe), learner, progress_printer)

    # Get minibatches of images and perform model training
    print(
        "Training transfer learning model for {0} epochs (epoch_size = {1}).".
        format(num_epochs, epoch_size))
    log_number_of_parameters(tl_model)
    for epoch in range(num_epochs):  # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data = minibatch_source.next_minibatch(min(
                mb_size, epoch_size - sample_count),
                                                   input_map=input_map)
            trainer.train_minibatch(data)  # update model with it
            sample_count += trainer.previous_minibatch_sample_count  # count samples processed so far
            if sample_count % (100 * mb_size) == 0:
                print("Processed {0} samples".format(sample_count))

        trainer.summarize_training_progress()

    return tl_model
コード例 #23
0
x = C.input_variable(shape=(2, ), needs_gradient=False)
t = C.input_variable(shape=(3, ), needs_gradient=False)

init = C.initializer.normal(0.01)
with C.layers.default_options(init=init):
    z = C.layers.Sequential(
        [C.layers.Dense(12, activation=C.relu),
         C.layers.Dense(3)])

y = C.cross_entropy_with_softmax(z(x), t)
acc = C.classification_error(z(x), t)

batch_size = 20
from cntk.learners import sgd, learning_parameter_schedule
lr = learning_parameter_schedule([.5 * (.1**i) for i in range(10000)],
                                 minibatch_size=batch_size,
                                 epoch_size=1000 * batch_size)
learner = sgd(z.parameters, lr)
trainer = C.Trainer(z(x), (y, acc), [learner])

for i in range(min(dataset_size, 100000) // batch_size):
    sample = X[batch_size * i:batch_size * (i + 1)]
    target = labels[batch_size * i:batch_size * (i + 1)]
    trainer.train_minibatch({x: sample, t: target})
    loss = trainer.previous_minibatch_loss_average
    acc = trainer.previous_minibatch_evaluation_average
    print("cost {} - classification error {} - learning rate {}".format(
        loss, acc, learner.learning_rate()))

y = C.argmax(z(x))
accuracy = 0
コード例 #24
0
ファイル: DeepQNeuralNetwork.py プロジェクト: AllanYiin/CNTK
    def __init__(self, input_shape, nb_actions,
                 gamma=0.99, explorer=LinearEpsilonAnnealingExplorer(1, 0.1, 1000000),
                 learning_rate=0.00025, momentum=0.95, minibatch_size=32,
                 memory_size=500000, train_after=200000, train_interval=4, target_update_interval=10000,
                 monitor=True):
        self.input_shape = input_shape
        self.nb_actions = nb_actions
        self.gamma = gamma

        self._train_after = train_after
        self._train_interval = train_interval
        self._target_update_interval = target_update_interval

        self._explorer = explorer
        self._minibatch_size = minibatch_size
        self._history = History(input_shape)
        self._memory = ReplayMemory(memory_size, input_shape[1:], 4)
        self._num_actions_taken = 0

        # Metrics accumulator
        self._episode_rewards, self._episode_q_means, self._episode_q_stddev = [], [], []

        # Action Value model (used by agent to interact with the environment)
        with default_options(activation=relu, init=he_uniform()):
            self._action_value_net = Sequential([
                Convolution2D((8, 8), 16, strides=4),
                Convolution2D((4, 4), 32, strides=2),
                Convolution2D((3, 3), 32, strides=1),
                Dense(256, init=he_uniform(scale=0.01)),
                Dense(nb_actions, activation=None, init=he_uniform(scale=0.01))
            ])
        self._action_value_net.update_signature(Tensor[input_shape])

        # Target model used to compute the target Q-values in training, updated
        # less frequently for increased stability.
        self._target_net = self._action_value_net.clone(CloneMethod.freeze)

        # Function computing Q-values targets as part of the computation graph
        @Function
        @Signature(post_states=Tensor[input_shape], rewards=Tensor[()], terminals=Tensor[()])
        def compute_q_targets(post_states, rewards, terminals):
            return element_select(
                terminals,
                rewards,
                gamma * reduce_max(self._target_net(post_states), axis=0) + rewards,
            )

        # Define the loss, using Huber Loss (more robust to outliers)
        @Function
        @Signature(pre_states=Tensor[input_shape], actions=Tensor[nb_actions],
                   post_states=Tensor[input_shape], rewards=Tensor[()], terminals=Tensor[()])
        def criterion(pre_states, actions, post_states, rewards, terminals):
            # Compute the q_targets
            q_targets = compute_q_targets(post_states, rewards, terminals)

            # actions is a 1-hot encoding of the action done by the agent
            q_acted = reduce_sum(self._action_value_net(pre_states) * actions, axis=0)

            # Define training criterion as the Huber Loss function
            return huber_loss(q_targets, q_acted, 1.0)

        # Adam based SGD
        lr_schedule = learning_parameter_schedule(learning_rate)
        m_schedule = momentum_schedule(momentum)
        vm_schedule = momentum_schedule(0.999)
        l_sgd = adam(self._action_value_net.parameters, lr_schedule,
                     momentum=m_schedule, variance_momentum=vm_schedule)

        self._metrics_writer = TensorBoardProgressWriter(freq=1, log_dir='metrics', model=criterion) if monitor else None
        self._learner = l_sgd
        self._trainer = Trainer(criterion, (criterion, None), l_sgd, self._metrics_writer)
コード例 #25
0
ファイル: learner_test.py プロジェクト: junaidnaseer/CNTK
def test_learning_parameter_schedule(params, expectation, minibatch_size):
    l = learning_parameter_schedule(*params)
    assert l.minibatch_size == minibatch_size
    assert [l[i] for i in range(len(expectation))] == expectation
コード例 #26
0
    def __init__(self,
                 input_shape,
                 nb_actions,
                 gamma=0.99,
                 explorer=ExpEpsilonAnnealingExplorer(1, 0.1, 1000000),
                 learning_rate=0.0005,
                 momentum=0.95,
                 minibatch_size=128,
                 memory_size=500000,
                 train_after=256,
                 train_interval=2,
                 target_update_interval=10000,
                 monitor=True):
        self.input_shape = input_shape
        self.nb_actions = nb_actions
        self.gamma = gamma

        self._train_after = train_after
        self._train_interval = train_interval
        self._target_update_interval = target_update_interval

        self._explorer = explorer
        self._minibatch_size = minibatch_size
        self._memory = ReplayMemory(memory_size, input_shape)
        self._num_actions_taken = 0

        # Metrics accumulator
        self._episode_rewards, self._episode_q_means, self._episode_q_stddev = [], [], []

        # Action Value model (used by agent to interact with the environment)
        with default_options(activation=relu, init=he_uniform()):
            self._action_value_net = Sequential([
                # Convolution2D((8, 8), 16, strides=4),
                # Convolution2D((4, 4), 32, strides=2),
                # Convolution2D((3, 3), 32, strides=1),
                Dense(128, init=he_uniform()),
                Dense(128, init=he_uniform()),
                Dense(nb_actions, activation=None, init=he_uniform())
            ])
        self._action_value_net.update_signature(Tensor[input_shape])

        # Target model used to compute the target Q-values in training, updated
        # less frequently for increased stability.
        self._target_net = self._action_value_net.clone(CloneMethod.freeze)

        # Function computing Q-values targets as part of the computation graph
        @Function
        @Signature(post_states=Tensor[input_shape],
                   rewards=Tensor[()],
                   terminals=Tensor[()])
        def compute_q_targets(post_states, rewards, terminals):
            return element_select(
                terminals,
                rewards,
                gamma * reduce_max(self._target_net(post_states), axis=0) +
                rewards,
            )

        # Define the loss, using Huber Loss (more robust to outliers)
        @Function
        @Signature(pre_states=Tensor[input_shape],
                   actions=Tensor[nb_actions],
                   post_states=Tensor[input_shape],
                   rewards=Tensor[()],
                   terminals=Tensor[()])
        def criterion(pre_states, actions, post_states, rewards, terminals):
            # Compute the q_targets
            q_targets = compute_q_targets(post_states, rewards, terminals)

            # actions is a 1-hot encoding of the action done by the agent
            q_acted = reduce_sum(self._action_value_net(pre_states) * actions,
                                 axis=0)

            # Define training criterion as the Huber Loss function
            return huber_loss(q_targets, q_acted, 1.0)

        # Adam based SGD
        lr_schedule = learning_parameter_schedule(learning_rate)
        m_schedule = momentum_schedule(momentum)
        vm_schedule = momentum_schedule(0.999)
        l_sgd = adam(self._action_value_net.parameters,
                     lr_schedule,
                     momentum=m_schedule,
                     variance_momentum=vm_schedule)

        log_dir = 'metrics/' + datetime.now().strftime('%Y%m%d%H%M%S')
        self._metrics_writer = TensorBoardProgressWriter(
            freq=1, log_dir=log_dir, model=criterion) if monitor else None
        self._learner = l_sgd
        self._trainer = Trainer(criterion, (criterion, None), l_sgd,
                                self._metrics_writer)
コード例 #27
0
def test_learner_init_legacy():
    i = C.input_variable(shape=(1, ), needs_gradient=True, name='a')
    w = parameter(shape=(1, ))

    res = i * w

    # for backcompatibility test
    # this will be deprecated in future version
    learner = sgd(res.parameters,
                  lr=learning_rate_schedule(0.1, UnitType.sample))
    assert learner._learning_rate_schedule.minibatch_size == 1  # the deprecated per sample schedule should not use compatible mode
    assert learner.learning_rate() == 0.1

    # for backcompatibility test
    # this will be deprecated in future version
    # The UnitType will provide per minibatch instruction for the learner
    # this will be deprecated in future version
    learner = sgd(res.parameters,
                  lr=learning_rate_schedule(0.1, UnitType.minibatch))
    assert learner.is_compatible_mode() == False
    assert learner.learning_rate() == 0.1
    assert learner.minibatch_size == C.learners.IGNORE
    assert learner._learning_rate_schedule.minibatch_size == 0

    # for backcompatibility test, in reset learning rate, the learner won't receive the reference minibatch size from the schedule
    # user will need to specify the reference minibatch size explicitly
    # this will be deprecated in future version
    learner = sgd(res.parameters, lr=0.1)
    learner.reset_learning_rate(
        learning_rate_schedule([1, 2, 3], UnitType.minibatch))
    assert learner.learning_rate() == 1.0
    learner.minibatch_size = C.learners.IGNORE  # reset to be per minibatch
    assert learner.minibatch_size == C.learners.IGNORE
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.is_compatible_mode() == True

    # for backcompatibility test
    # this will be deprecated in future version
    learner = sgd(res.parameters,
                  lr=learning_rate_schedule(0.1, UnitType.sample),
                  minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.learning_rate() == 0.1
    assert learner.minibatch_size == C.learners.IGNORE  # the learner's reference minibatch size is still 0

    # this will be deprecated in future version: This is logical invalid combination but it was the only way to use mean gradient and set learning rate in the past.
    learner = sgd(res.parameters,
                  lr=learning_rate_schedule(0.1, UnitType.sample),
                  use_mean_gradient=True)
    assert learner.is_compatible_mode() == True
    assert learner.learning_rate() == 0.1
    #test the override in the new version
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.minibatch_size == C.learners.IGNORE  # the learner's reference minibatch size is still 0

    # for backcompatibility test
    # this will be deprecated in future version
    # The UnitType will provide per minibatch instruction for the learner
    # this will be deprecated in future version
    learner = sgd(res.parameters,
                  lr=learning_rate_schedule(0.1, UnitType.minibatch),
                  minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.learning_rate() == 0.1
    assert learner.minibatch_size == C.learners.IGNORE
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE

    # for backcompatibility test, in reset learning rate, the learner won't receive the reference minibatch size from the schedule
    # user will need to specify the reference minibatch size explicitly
    # this will be deprecated in future version
    learner = sgd(res.parameters, lr=0.1)
    learner.reset_learning_rate(
        learning_rate_schedule([1, 2, 3], UnitType.minibatch))
    assert learner.learning_rate() == 1.0
    learner.minibatch_size = C.learners.IGNORE  # reset to be per minibatch
    assert learner.minibatch_size == C.learners.IGNORE
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.is_compatible_mode() == True

    learner_parameter = learner.parameters
    from cntk.variables import Parameter
    param = learner_parameter[0]
    assert isinstance(param, Parameter)

    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    # back compatible API test
    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = learning_parameter_schedule(0.1, minibatch_size=1)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant,
                   unit_gain_value)
    C.momentum_sgd(res.parameters,
                   lr_per_sample,
                   momentum_time_constant,
                   unit_gain=unit_gain_value)

    C.set_default_unit_gain_value(False)
    unit_gain_value = C.default_unit_gain_value()
    assert not unit_gain_value

    C.set_default_unit_gain_value(True)
    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    lr_per_sample = learning_rate_schedule([(3, 0.1), (2, 0.2), (1, 0.3)],
                                           unit=UnitType.sample)
    C.fsadagrad(res.parameters,
                lr=lr_per_sample,
                momentum=momentum_time_constant)
    C.fsadagrad(res.parameters, lr_per_sample, momentum_time_constant,
                unit_gain_value)
    C.fsadagrad(res.parameters,
                lr=lr_per_sample,
                momentum=momentum_time_constant,
                unit_gain=unit_gain_value)

    gamma, inc, dec, max, min = [0.5, 1.2, 0.7, 10, 1e-8]
    lr_per_sample = learning_rate_schedule([0.1, 0.2],
                                           unit=UnitType.sample,
                                           epoch_size=100)
    C.rmsprop(res.parameters, lr_per_sample, gamma, inc, dec, max, min, True)

    C.adadelta(res.parameters, lr_per_sample, use_mean_gradient=True)
        ((0.2, 0), [0.2], 0),
        ((0.2, 0), [0.2, 0.2, 0.2, 0.2], 0),
        (([0.2,0.4], 0, 5), [0.2]*5+[0.4]*20, 0),
        (([(3,0.2),(2,0.4),(1,0.8)], 0, 5), [0.2]*15+[0.4]*10+[0.8]*20, 0),
        ]

MOMENTUM_SCHEDULE_PARAMS = [
        ((0.2,), [0.2]),
        ((0.2,), [0.2, 0.2, 0.2, 0.2]),
        (([0.2,0.4], 5), [0.2]*5+[0.4]*20),
        (([(3,0.2),(2,0.4),(1,0.8)], 5), [0.2]*15+[0.4]*10+[0.8]*20),
        ]

LEARNER_LAMBDAS = [
    lambda params: C.adadelta(params),
    lambda params: C.adagrad(params, lr=learning_parameter_schedule(1)),
    lambda params: C.adam(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9)),
    lambda params: C.fsadagrad(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9)),
    lambda params: C.nesterov(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9)),
    lambda params: C.rmsprop(params, lr=learning_parameter_schedule(1), gamma=0.1, inc=3.0, dec=0.1, max=np.inf, min=1e-8),
    lambda params: C.sgd(params, lr=learning_parameter_schedule(1)),
    lambda params: C.momentum_sgd(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9))]

@pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS_LEGACY)
def test_learning_rate_schedule(params, expectation, minibatch_size):
    l = learning_rate_schedule(*params)
    assert l.minibatch_size == minibatch_size
    assert [l[i] for i in range(len(expectation))] == expectation

@pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS)
def test_learning_parameter_schedule(params, expectation, minibatch_size):
コード例 #29
0
def train_and_evaluate(reader_train,
                       reader_test,
                       network_name,
                       epoch_size,
                       max_epochs,
                       minibatch_size,
                       model_dir=None,
                       log_dir=None,
                       tensorboard_logdir=None,
                       gen_heartbeat=False,
                       fp16=False):
    """

    :param reader_train:
    :param reader_test:
    :param network_name:
    :param epoch_size:    一个epoch有多少样本
    :param max_epochs:    训练多少个epoch
    :param model_dir:
    :param log_dir:
    :param tensorboard_logdir:
    :param gen_heartbeat:
    :param fp16:
    :return:准确率,用时
    """
    set_computation_network_trace_level(0)

    # Input variables denoting the features and label data
    input_var = C.input_variable((num_channels, image_height, image_width),
                                 name='features')
    label_var = C.input_variable((num_classes))

    with C.default_options(dtype=np.float32):
        # create model, and configure learning parameters
        model = create_cifar10_model(input_var, 3, num_classes)
        # loss and metric
        loss = cross_entropy_with_softmax(model, label_var)
        error_rate = classification_error(model, label_var)

    # shared training parameters

    # Set learning parameters
    lr_per_sample = []
    check_point = [80, 120, 160, 180]
    lrs = [3e-2, 3e-3, 3e-4, 3e-4, 5e-5]
    for i in range(max_epochs + 1):
        if i in range(0, check_point[0]):
            lr_per_sample.append(lrs[0])
        if i in range(check_point[0], check_point[1]):
            lr_per_sample.append(lrs[1])
        if i in range(check_point[1], check_point[2]):
            lr_per_sample.append(lrs[2])
        if i in range(check_point[2], check_point[3]):
            lr_per_sample.append(lrs[3])
        if i > check_point[3]:
            lr_per_sample.append(lrs[4])

    lr_schedule = learning_parameter_schedule(lr_per_sample,
                                              minibatch_size=minibatch_size,
                                              epoch_size=epoch_size)
    mm_schedule = momentum_schedule(0.9, minibatch_size)  #动量

    # progress writers
    progress_writers = [
        ProgressPrinter(tag='Training',
                        num_epochs=max_epochs,
                        gen_heartbeat=gen_heartbeat)
    ]
    tensorboard_writer = None
    if tensorboard_logdir is not None:
        tensorboard_writer = TensorBoardProgressWriter(
            freq=10, log_dir=tensorboard_logdir, model=model)
        progress_writers.append(tensorboard_writer)

    # trainer object
    l2_reg_weight = 0.0001
    learner = adam(model.parameters,
                   lr_schedule,
                   mm_schedule,
                   l2_regularization_weight=l2_reg_weight)
    trainer = Trainer(model, (loss, error_rate), learner, progress_writers)

    # define mapping from reader streams to network inputs
    input_map = {
        input_var: reader_train.streams.features,
        label_var: reader_train.streams.labels
    }

    log_number_of_parameters(model)
    print("*********Training Start*********")
    start = time.clock()
    for epoch in range(max_epochs):  # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data = reader_train.next_minibatch(
                min(minibatch_size, epoch_size - sample_count),
                input_map=input_map)  # fetch minibatch.
            trainer.train_minibatch(data)  # update model with it
            sample_count += trainer.previous_minibatch_sample_count  # count samples processed so far

        trainer.summarize_training_progress()

        # Log mean of each parameter tensor, so that we can confirm that the parameters change indeed.
        if tensorboard_writer:
            for parameter in model.parameters:
                tensorboard_writer.write_value(parameter.uid + "/mean",
                                               reduce_mean(parameter).eval(),
                                               epoch)

        if model_dir:
            model.save(
                os.path.join(model_dir,
                             network_name + "_{}.dnn".format(epoch)))
        enable_profiler()  # begin to collect profiler data after first epoch

    # Evaluation parameters
    test_epoch_size = 10000
    minibatch_size = 32

    # process minibatches and evaluate the model
    metric_numer = 0
    metric_denom = 0
    sample_count = 0

    while sample_count < test_epoch_size:
        current_minibatch = min(minibatch_size, test_epoch_size - sample_count)
        # Fetch next test min batch.
        data = reader_test.next_minibatch(current_minibatch,
                                          input_map=input_map)
        # minibatch data to be trained with
        metric_numer += trainer.test_minibatch(data) * current_minibatch
        metric_denom += current_minibatch
        # Keep track of the number of samples processed so far.
        sample_count += data[label_var].num_samples

    print("")
    trainer.summarize_test_progress()
    print("")
    elapsed = (time.clock() - start)
    return 1 - metric_numer / metric_denom, elapsed