コード例 #1
0
ファイル: learner_test.py プロジェクト: junaidnaseer/CNTK
def test_noise_injection_with_checkpointing():
    from cntk import initializer
    shape = (100,100)

    w1 = parameter(shape=shape, init=initializer.glorot_uniform(seed=123))
    w2 = parameter(shape=shape, init=initializer.glorot_uniform(seed=123))
    w3 = parameter(shape=shape, init=initializer.glorot_uniform(seed=123))

    lr=learning_rate_schedule(0.5, UnitType.sample)
    m=C.momentum_schedule(0.99)

    learner1 = C.momentum_sgd([w1], lr, m, gaussian_noise_injection_std_dev=0.5)
    learner2 = C.momentum_sgd([w2], lr, m, gaussian_noise_injection_std_dev=0.5)
    learner3 = C.momentum_sgd([w3], lr, m, gaussian_noise_injection_std_dev=0.5)

    assert np.allclose(w1.value, w2.value) and np.allclose(w1.value, w3.value)

    for i in range(10):
        checkpoint = learner1.create_checkpoint()

        v =  np.float32(np.random.rand(100,100))

        learner1.update({w1: v}, 1)
        learner2.update({w2: v}, 1)
        assert not np.allclose(w1.value, w2.value)

        learner3.restore_from_checkpoint(checkpoint)
        learner3.update({w3: v}, 1)
        assert np.allclose(w1.value, w3.value)
コード例 #2
0
    def create_trainer(self):
        try:
            p = self.output.parameters
            # Three of four parameters are learned by block_momentum_distributed_learner.
            bmd_learner = cntk.block_momentum_distributed_learner(
                cntk.momentum_sgd(
                    [p[0], p[1], p[2]],
                    cntk.learning_parameter_schedule(0.0001),
                    cntk.momentum_as_time_constant_schedule(1000)),
                block_size=1000,
                block_learning_rate=0.01,
                block_momentum_as_time_constant=1000)

            # New API to mark which learner is to use for metric aggregaion.
            bmd_learner.set_as_metric_aggregator()

            # The last parameter is learned by the data_parallel_distributed_learner.
            momentum_schedule = cntk.momentum_schedule_per_sample(
                0.9990913221888589)
            lr_per_sample = cntk.learning_parameter_schedule_per_sample(0.007)
            dpd_learner = cntk.data_parallel_distributed_learner(
                cntk.momentum_sgd([p[3]], lr_per_sample, momentum_schedule,
                                  True))

            comm_rank = cntk.distributed.Communicator.rank()
            self.trainer = cntk.Trainer(
                self.output, (self.ce, self.err), [bmd_learner, dpd_learner], [
                    cntk.logging.ProgressPrinter(
                        freq=progress_freq, tag="Training", rank=comm_rank)
                ])
        except RuntimeError:
            self.trainer = None
        return
コード例 #3
0
def test_noise_injection_with_checkpointing():
    from cntk import initializer
    shape = (100,100)
    
    w1 = parameter(shape=shape, init=initializer.glorot_uniform(seed=123))
    w2 = parameter(shape=shape, init=initializer.glorot_uniform(seed=123))
    w3 = parameter(shape=shape, init=initializer.glorot_uniform(seed=123))
    
    lr=learning_rate_schedule(0.5, UnitType.sample)
    m=C.momentum_schedule(0.99)

    learner1 = C.momentum_sgd([w1], lr, m, gaussian_noise_injection_std_dev=0.5)
    learner2 = C.momentum_sgd([w2], lr, m, gaussian_noise_injection_std_dev=0.5)
    learner3 = C.momentum_sgd([w3], lr, m, gaussian_noise_injection_std_dev=0.5)

    assert np.allclose(w1.value, w2.value) and np.allclose(w1.value, w3.value)

    for i in range(10):
        checkpoint = learner1.create_checkpoint()

        v =  np.float32(np.random.rand(100,100))
    
        learner1.update({w1: v}, 1)
        learner2.update({w2: v}, 1)
        assert not np.allclose(w1.value, w2.value)

        learner3.restore_from_checkpoint(checkpoint)
        learner3.update({w3: v}, 1)
        assert np.allclose(w1.value, w3.value)
コード例 #4
0
def test_trainer(tmpdir, no_eval_function):
    in1 = C.input_variable(shape=(1,))
    labels = C.input_variable(shape=(1,))
    p = parameter(shape=(2,), init=10)
    z = plus(in1, reduce_sum(p), name='z')
    ce = cross_entropy_with_softmax(z, labels)
    if no_eval_function:
        errs = None
    else:
        errs = classification_error(z, labels)

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_parameter_schedule(0.007, minibatch_size =1)
    trainer = C.Trainer(z, (ce, errs),
            [C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)])
    in1_value = [[1],[2]]
    label_value = [[0], [1]]
    arguments = {in1: in1_value, labels: label_value}
    z_output = z.output
    updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output])

    p = str(tmpdir / 'checkpoint.dat')
    external_state = {"additional external state":math.pi, "nested dict":{"a":"b"}, "list":[1,2,3]}
    trainer.save_checkpoint(p, external_state)
    restored_state = trainer.restore_from_checkpoint(p)

    assert external_state == restored_state

    assert trainer.model.name == 'z'

    # Ensure that Swig is not leaking raw types
    assert isinstance(trainer.model, Function)
    assert trainer.model.__doc__
    assert isinstance(trainer.parameter_learners[0], C.Learner)
コード例 #5
0
def test_learner_logging():
    from cntk import Trainer
    from cntk.logging import ProgressPrinter
    from cntk import cross_entropy_with_softmax, classification_error

    features = C.input_variable(shape=(1,), needs_gradient=True, name='a')
    w_init = 1
    w = parameter(shape=(1,), init=w_init)
    z = features * w
    labels = C.input_variable(shape=(1,), name='b')
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error(z, labels)

    writer = TestProgressWriter();
    lr_values = [0.3, 0.2, 0.1, 0]
    m_values = [0.6, 0.7, 0.8]
    learner = C.momentum_sgd(z.parameters,
                  learning_rate_schedule(lr_values, UnitType.sample, 1),
                  C.momentum_schedule(m_values, 1))
    trainer = Trainer(z, (ce, errs), [learner], writer)

    for i in range(10):
        trainer.train_minibatch({features: [[2.]], labels: [[1.]]})
    
    assert len(writer.log_output) == len(lr_values + m_values)

    values = [j for i in zip(lr_values,m_values) for j in i] + [0]

    for i in range(len(values)):
        assert (values[i] == writer.log_output[i])
コード例 #6
0
def create_network(para, verbose=False):
    with cntk.layers.default_options(init=cntk.glorot_uniform(), activation=cntk.ops.relu):
        # In order to accelerate the debugging step, we choose a simple structure with only 2 parameters

        h = cntk.layers.Convolution2D(filter_shape=(5, 5), num_filters=para[0],
                                      strides=(1, 1), pad=True, name='C1')(network_input / 255.0)
        h = cntk.layers.layers.MaxPooling(filter_shape=(5, 5), strides=(2, 2), )(h)

        h = cntk.layers.Convolution2D(filter_shape=(5, 5), num_filters=para[1],
                                      strides=(1, 1), pad=True, name='C2')(h)
        h = cntk.layers.layers.MaxPooling(filter_shape=(5, 5), strides=(2, 2))(h)

        h = cntk.layers.Convolution2D(filter_shape=(3, 3), num_filters=para[2],
                                      strides=(1, 1), pad=True, name='C2')(h)

        h = cntk.layers.Dense(para[3])(h)

        h = cntk.layers.Dropout(0.25)(h)

        z = cntk.layers.Dense(10, activation=None, name='R')(h)
    loss = cntk.cross_entropy_with_softmax(z, network_label)
    label_error = cntk.classification_error(z, network_label)
    lr_schedule = cntk.learning_rate_schedule(0.1, cntk.UnitType.minibatch)
    learner = cntk.momentum_sgd(z.parameters, lr_schedule, cntk.momentum_schedule(0.9))
    trainer = cntk.Trainer(z, (loss, label_error), [learner])
    if verbose: log = cntk.logging.ProgressPrinter(100)
    for _ in xrange(20000):
        data = train_reader.next_minibatch(100, input_map=mapping(train_reader))
        trainer.train_minibatch(data)
        if verbose: log.update_with_trainer(trainer)
    return trainer
コード例 #7
0
def init_model(m):
    progress_writers = [
        cntk.logging.ProgressPrinter(
            freq=int(BATCHSIZE / 2),
            rank=cntk.train.distributed.Communicator.rank(),
            num_epochs=EPOCHS)
    ]

    # Loss (dense labels); check if support for sparse labels
    loss = cntk.cross_entropy_with_softmax(m, labels)
    # Momentum SGD
    # https://github.com/Microsoft/CNTK/blob/master/Manual/Manual_How_to_use_learners.ipynb
    # unit_gain=False: momentum_direction = momentum*old_momentum_direction + gradient
    # if unit_gain=True then ...(1-momentum)*gradient
    local_learner = cntk.momentum_sgd(
        m.parameters,
        lr=cntk.learning_rate_schedule(LR, cntk.UnitType.minibatch),
        momentum=cntk.momentum_schedule(MOMENTUM),
        unit_gain=False)

    distributed_learner = cntk.train.distributed.data_parallel_distributed_learner(
        local_learner)

    trainer = cntk.Trainer(m, (loss, cntk.classification_error(m, labels)),
                           [distributed_learner], progress_writers)

    return trainer, distributed_learner
コード例 #8
0
ファイル: cntk_bisru_fast.py プロジェクト: iloveat/study_DL
    def __init__(self, n_in, n_out, init_lr, momentum):

        self.param1 = 512
        self.param2 = 256

        self.n_in = int(n_in)
        self.n_out = int(n_out)
        self.input = C.sequence.input_variable(shape=(self.n_in,))
        self.label = C.sequence.input_variable(shape=(self.n_out,))

        self.three_dnn = C.layers.Sequential([
            C.layers.Dense(self.param1, activation=C.tanh, name='dnn_three_1'),
            C.layers.Dense(self.param1, activation=C.tanh, name='dnn_three_2'),
            C.layers.Dense(self.param1, activation=C.tanh, name='dnn_three_3')])
        self.final_dnn = C.layers.Dense(self.n_out, name='dnn_final')
        self.dnn_1 = C.layers.Dense(8 * self.param2, bias=False, name='dnn_1')
        self.dnn_2 = C.layers.Dense(8 * self.param2, bias=False, name='dnn_2')
        self.dnn_3 = C.layers.Dense(8 * self.param2, bias=False, name='dnn_3')
        self.dnn_4 = C.layers.Dense(8 * self.param2, bias=False, name='dnn_4')
        self.list_bias = []
        for i in xrange(16):
            self.list_bias.append(C.parameter(shape=(self.param2, ), name='bias_' + str(i)))

        self.output = self.model(self.input)

        self.loss = loss_fun(self.output, self.label)
        self.eval_err = loss_fun(self.output, self.label)

        self.lr_s = C.learning_rate_schedule(init_lr, C.UnitType.sample)
        self.mom_s = C.momentum_schedule(momentum)
        self.learner = C.momentum_sgd(self.output.parameters, lr=self.lr_s, momentum=self.mom_s)
        self.trainer = C.Trainer(self.output, (self.loss, self.eval_err), [self.learner])
コード例 #9
0
ファイル: learner_test.py プロジェクト: junaidnaseer/CNTK
def test_learner_logging():
    from cntk import Trainer
    from cntk.logging import ProgressPrinter
    from cntk import cross_entropy_with_softmax, classification_error

    features = C.input_variable(shape=(1,), needs_gradient=True, name='a')
    w_init = 1
    w = parameter(shape=(1,), init=w_init)
    z = features * w
    labels = C.input_variable(shape=(1,), name='b')
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error(z, labels)

    writer = TestProgressWriter();
    lr_values = [0.3, 0.2, 0.1, 0]
    m_values = [0.6, 0.7, 0.8]
    learner = C.momentum_sgd(z.parameters,
                  learning_rate_schedule(lr_values, UnitType.sample, 1),
                  C.momentum_schedule(m_values, 1))
    trainer = Trainer(z, (ce, errs), [learner], writer)

    for i in range(10):
        trainer.train_minibatch({features: [[2.]], labels: [[1.]]})

    assert len(writer.log_output) == len(lr_values + m_values)

    values = [j for i in zip(lr_values,m_values) for j in i] + [0]

    for i in range(len(values)):
        assert (values[i] == writer.log_output[i])
コード例 #10
0
 def create_trainer(self):
     try:
         learner = cntk.block_momentum_distributed_learner(cntk.momentum_sgd(self.output.parameters, cntk.learning_parameter_schedule(0.0001), cntk.momentum_as_time_constant_schedule(1000)), 
                                                           block_size=1000, block_learning_rate=0.01, block_momentum_as_time_constant=1000)
         
         comm_rank = cntk.distributed.Communicator.rank()
         self.trainer = cntk.Trainer(self.output, (self.ce, self.err), [learner], [cntk.logging.ProgressPrinter(freq=progress_freq, tag="Training", rank=comm_rank)])
     except RuntimeError:
         self.trainer = None
     return
コード例 #11
0
def fineTuneModel(folder_with_data,path_to_label_csv="label.csv",
    original_model_path="../vgg13.model",max_epochs=10):

    trainingValues = getData(folder_with_data,path_to_label_csv)

    input_var =ct.input((1,height,width),np.float32)
    label_var = ct.input((num_classes), np.float32)
    print("cloning old model")
    z = clone_model(original_model_path,input_var)
    loss = ct.cross_entropy_with_softmax(z, label_var)
    metric = ct.classification_error(z, label_var) 

    minibatch_size = 32
    epoch_size = trainingValues.getLengthOfData()

    lr_per_minibatch = [learning_rate]*10+[learning_rate/2.0]
    mm_time_constant = -minibatch_size/np.log(0.9)
    lr_schedule = ct.learning_rate_schedule(lr_per_minibatch,
        unit=ct.UnitType.minibatch, epoch_size=epoch_size)
    mm_schedule = ct.momentum_as_time_constant_schedule(mm_time_constant)

    learner = ct.momentum_sgd(z.parameters, lr_schedule, mm_schedule)
    trainer = ct.Trainer(z, (loss, metric), learner)
    print("created trainer and learner")

    print("training started")
    while epoch < max_epochs :

        trainingValues.reset() 
        # Training 
        start_time = time.time()
        training_loss = 0
        training_accuracy = 0

        #mini-batch learning
        while trainingValues.hasMoreMinibatches():
            #while there is data for a mini batch:
            x,y,currBatchSize = trainingValues.getNextMinibatch(minibatch_size)
            # x - images y - labels/emotions
            trainer.train_minibatch({ input_var : x, label_var: y})

            #maintain stats:
            training_loss += trainer.previous_minibatch_loss_average *    currBatchSize
            training_accuracy += trainer.previous_minibatch_evaluation_average * currBatchSize
            
        training_accuracy /= trainingValues.getLengthOfData()
        training_accuracy = 1.0 - training_accuracy

        print("Epoch took:", time.time() - start_time, "seconds")
        print("training accuracy:\t\t{:.2f}%".format(training_accuracy*100))

        epoch +=1

    #SAVE MODEL
    z.save("../vgg13.model")
コード例 #12
0
def finalize_network(reader, model_details, max_amount_of_epochs,
                     samples_per_epoch, samples_per_minibatch,
                     pixel_dimensions, classes, learning_rate):
    features = input_variable(shape=(pixel_dimensions['depth'],
                                     pixel_dimensions['height'],
                                     pixel_dimensions['width']))
    label = input_variable(shape=len(classes))

    # speeds up training
    normalized_features = element_times(1.0 / 256.0, features)

    model = create_tf_model(model_details,
                            num_classes=len(classes),
                            input_features=normalized_features,
                            freeze=True)

    loss = cross_entropy_with_softmax(model, label)
    metric = classification_error(model, label)
    learner = momentum_sgd(parameters=model.parameters,
                           lr=learning_rate_schedule(learning_rate,
                                                     UnitType.minibatch),
                           momentum=0.9,
                           l2_regularization_weight=0.0005)

    reporter = ProgressPrinter(tag='training', num_epochs=max_amount_of_epochs)

    trainer = Trainer(model=model,
                      criterion=(loss, metric),
                      parameter_learners=[learner],
                      progress_writers=[reporter])

    log_number_of_parameters(model)

    map_input_to_streams_train = {
        features: reader.streams.features,
        label: reader.streams.labels
    }

    training_session(trainer=trainer,
                     mb_source=reader,
                     model_inputs_to_streams=map_input_to_streams_train,
                     mb_size=samples_per_minibatch,
                     progress_frequency=samples_per_epoch,
                     checkpoint_config=CheckpointConfig(
                         frequency=samples_per_epoch,
                         filename=os.path.join("./checkpoints",
                                               "ConvNet_Lego_VisiOn"),
                         restore=True)).train()
    network = {'features': features, 'label': label, 'model': softmax(model)}
    model_name = f"CNN-3200-224-resnet-18.model"
    export_path = os.path.abspath(
        os.path.join("..", "..", "Final models", "CNN", model_name))
    model.save(export_path)
    return network
コード例 #13
0
def create_learner(model):
    '''Create the optimized method'''
    optim = "momentum_sgd"
    lr = 0.001
    lr_per_sample = C.learning_parameter_schedule_per_sample(lr)
    momentum_schedule = C.momentum_schedule_per_sample(0.9990913221888589)
    if optim == 'momentum_sgd':
        clipping_threshold_per_sample = 5.0
        gradient_clipping_with_truncation = True
        return C.momentum_sgd(model.parameters, lr_per_sample, momentum_schedule,
                              gradient_clipping_threshold_per_sample=clipping_threshold_per_sample,
                              gradient_clipping_with_truncation=gradient_clipping_with_truncation)
コード例 #14
0
ファイル: model.py プロジェクト: whaozl/CuteChineseTTS
 def __init__(self, dim_x, dim_y):
     self.dim_x = int(dim_x)
     self.dim_y = int(dim_y)
     self.input = cntk.sequence.input_variable(shape=(self.dim_x, ))
     self.label = cntk.sequence.input_variable(shape=(self.dim_y, ))
     self.output = self.model(self.input)
     self.loss = loss_fun(self.output, self.label)
     self.eval = loss_fun(self.output, self.label)
     self.learner = cntk.momentum_sgd(parameters=self.output.parameters,
                                      momentum=cntk.momentum_schedule(0.5),
                                      lr=cntk.learning_rate_schedule(0.006, cntk.UnitType.sample))
     self.trainer = cntk.Trainer(self.output, (self.loss, self.eval), [self.learner])
コード例 #15
0
    def modelInit(self):

        #create output model folder:
        self.output_model_folder = os.path.join(self.base_folder, R'models')
        if not os.path.exists(self.output_model_folder):
            os.makedirs(self.output_model_folder)

        self.model = VGG13(self.num_classes)
        self.input_var = ct.input(
            (1, self.model.input_height, self.model.input_width), np.float32)
        self.label_var = ct.input((self.num_classes), np.float32)

        print("initialized model")

        self.genData()
        #ct.input_variables takes the no. of dimensions. and automatically creates
        #1-hot encoded. ct.input doesn't.

        #criterian of model: loss, metric:
        #loss = cross_entropy_with_softmax
        #metric = classification error
        self.z = self.model.model(self.input_var)
        loss = ct.cross_entropy_with_softmax(self.z, self.label_var)
        metric = ct.classification_error(self.z, self.label_var)
        """
        pred = ct.softmax(z)
        loss = ct.negate(ct.reduce_sum(ct.element_times(label_var, ct.log(pred)), axis=-1)) 
        """
        minibatch_size = 32
        epoch_size = self.trainingValues.getLengthOfData()

        #THROW MOMENTUM:
        lr_per_minibatch = [self.model.learning_rate
                            ] * 20 + [self.model.learning_rate / 2.0] * 20 + [
                                self.model.learning_rate / 10.0
                            ]
        #use eta for 20 minibatches, then half of eta for other 20 batches then eta/10 for remaining minimaches
        mm_time_constant = -minibatch_size / np.log(0.9)
        lr_schedule = ct.learning_rate_schedule(lr_per_minibatch,
                                                unit=ct.UnitType.minibatch,
                                                epoch_size=epoch_size)
        mm_schedule = ct.momentum_as_time_constant_schedule(mm_time_constant)

        # construct the trainer
        #learner performs model updates. can be adam() or sgd()
        learner = ct.momentum_sgd(self.z.parameters, lr_schedule, mm_schedule)
        # The Trainer optimizes the loss by SGD, and logs the metric
        self.trainer = ct.Trainer(self.z, (loss, metric), learner)

        print("created trainer and learner")
コード例 #16
0
    def create_trainer(self):
        learner = cntk.block_momentum_distributed_learner(
            cntk.momentum_sgd(self.output.parameters,
                              cntk.learning_parameter_schedule(0.0001),
                              cntk.momentum_as_time_constant_schedule(1000)),
            block_size=1000,
            block_learning_rate=0.01,
            block_momentum_as_time_constant=1000)

        comm_rank = cntk.distributed.Communicator.rank()
        self.trainer = cntk.Trainer(
            self.output, (self.ce, self.err), [learner], [
                cntk.logging.ProgressPrinter(
                    freq=progress_freq, tag="Training", rank=comm_rank)
            ])
コード例 #17
0
def test_output_to_retain():
    in1 = C.input_variable(shape=(1,))
    labels = C.input_variable(shape=(1,))
    p = parameter(shape=(2,), init=10)
    z = plus(in1, reduce_sum(p), name='z')
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error(z, labels)
    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_parameter_schedule(0.007, minibatch_size =1)
    trainer = C.Trainer(z, (ce, errs),
            [C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)])
    in1_value = [[1], [2]]
    label_value = [[0], [1]]
    arguments = {in1: in1_value, labels: label_value}
    z_output = z.output
    updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output])
    assert np.allclose(var_map[z_output], np.asarray(in1_value)+20)
コード例 #18
0
def test_ext_train(tmpdir):
    dim = 4

    p = C.parameter(shape=(dim, ), init=10)
    i = C.sequence.input_variable(dim, needs_gradient=True, name='i_var')
    m = MyPlus(i, C.constant(3), 'my_plus')
    # keeping m unwrapped since we need to access its member variables
    z = C.user_function(m) + p

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_parameter_schedule(0.007, minibatch_size=1)
    trainer = C.Trainer(z, (z + 0, z + 0), [
        C.momentum_sgd(z.parameters,
                       lr_per_sample,
                       momentum_time_constant,
                       True,
                       minibatch_size=0)
    ])

    i = 0
    while i < 100:
        i += 1
        input_data = np.random.rand(dim)
        trainer.train_minibatch([input_data])

    assert m.forward_calls == m.backward_calls == 100

    filepath = str(tmpdir / 'test_ext_train.dat')

    z.save(filepath)

    buf = open(filepath, 'rb').read()

    # this is only need for Python 2.7
    # (which does not distinguish between bytes and strings)
    if isinstance(buf, str):
        buf = bytearray(buf)

    z1 = Function.load(buf)

    m1 = z1.find_by_name('my_plus')
    # m1 is an instance of UserFunction, cannot directly downcast it to MyPlus,
    # using serialize as workaround:
    state = m1.serialize()['state']

    assert state['forward_calls'] == state['backward_calls'] == 100
コード例 #19
0
def test_ext_lambdafunc(tmpdir):
    dim = 4

    class CallbackCounter(object):
        def __init__(self):
            self.count = 0

        def inc(self, arg):
            self.count += 1

    cb = CallbackCounter()

    p = C.parameter(shape=(dim,), init=1)
    i = C.input_variable(dim, needs_gradient=True, name='i_var')
    k = i * p
    m = LambdaFunc(k,
                   when=lambda arg: np.sum(arg) > 1,
                   execute=cb.inc)
    m = C.user_function(m)
    z0 = m + 0

    filepath = str(tmpdir / 'test_ext_lambdafunc.dat')
    z0.save(filepath)

    Function.register_udf_deserialize_callback('conditional_exec_lambda',
                                               lambda x, *unused: LambdaFunc(x, when=lambda arg: np.sum(arg) > 1, execute=cb.inc))

    z = Function.load(filepath)

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_parameter_schedule(0.007, minibatch_size = 1)
    trainer = C.Trainer(z, (z + 0, z + 0), [C.momentum_sgd(z.parameters,
                                                           lr_per_sample,
                                                           momentum_time_constant,
                                                           True)])

    i = 0
    input_data = 0.1 * np.ones(dim)
    trainer.train_minibatch([input_data])
    assert cb.count == 0

    input_data = 0.3 * np.ones(dim)
    trainer.train_minibatch([input_data])
    assert cb.count == 1
コード例 #20
0
ファイル: userfunction_test.py プロジェクト: vnvizitiu/CNTK
def test_ext_lambdafunc(tmpdir):
    dim = 4

    class CallbackCounter(object):
        def __init__(self):
            self.count = 0

        def inc(self, arg):
            self.count += 1

    cb = CallbackCounter()

    p = C.parameter(shape=(dim,), init=1)
    i = C.input_variable(dim, needs_gradient=True, name='i_var')
    k = i * p
    m = LambdaFunc(k,
                   when=lambda arg: np.sum(arg) > 1,
                   execute=cb.inc)
    m = C.user_function(m)
    z0 = m + 0

    filepath = str(tmpdir / 'test_ext_lambdafunc.dat')
    z0.save(filepath)

    Function.register_udf_deserialize_callback('conditional_exec_lambda',
                                               lambda x, *unused: LambdaFunc(x, when=lambda arg: np.sum(arg) > 1, execute=cb.inc))

    z = Function.load(filepath)

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample)
    trainer = C.Trainer(z, (z + 0, z + 0), [C.momentum_sgd(z.parameters,
                                                           lr_per_sample,
                                                           momentum_time_constant,
                                                           True)])

    i = 0
    input_data = 0.1 * np.ones(dim)
    trainer.train_minibatch([input_data])
    assert cb.count == 0

    input_data = 0.3 * np.ones(dim)
    trainer.train_minibatch([input_data])
    assert cb.count == 1
コード例 #21
0
ファイル: distributed_test.py プロジェクト: psccfund/pscc
def run_distributed_training(tmpdir, create_func):

    in1 = sequence.input_variable(shape=1)
    labels = sequence.input_variable(shape=1)
    p = parameter(shape=2, init=10)
    z = plus(in1, reduce_sum(p), name='z')
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error(z, labels)

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample)
    dist_learner = create_func(
        C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant,
                       True))

    communicator = dist_learner.communicator()
    workers = communicator.workers()
    current_worker = communicator.current_worker()
    found_rank = False
    for wk in workers:
        if current_worker.global_rank == wk.global_rank:
            found_rank = True

    assert found_rank

    trainer = C.Trainer(z, (ce, errs), [dist_learner])
    in1_value = [[1], [2]]
    label_value = [[0], [1]]
    arguments = {in1: in1_value, labels: label_value}
    z_output = z.output
    updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output])

    p = str(tmpdir / 'checkpoint.dat')
    trainer.save_checkpoint(p)
    trainer.restore_from_checkpoint(p)

    communicator.barrier()

    assert trainer.model.name == 'z'

    # Ensure that Swig is not leaking raw types
    assert isinstance(trainer.model, Function)
    assert trainer.model.__doc__
コード例 #22
0
ファイル: userfunction_test.py プロジェクト: vnvizitiu/CNTK
def test_ext_train(tmpdir):
    dim = 4

    p = C.parameter(shape=(dim,), init=10)
    i = C.sequence.input_variable(dim, needs_gradient=True, name='i_var')
    m = MyPlus(i, C.constant(3), 'my_plus')
    # keeping m unwrapped since we need to access its member variables
    z = C.user_function(m) + p

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample)
    trainer = C.Trainer(z, (z + 0, z + 0),
                        [C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant,
                                        True)])

    i = 0
    while i < 100:
        i += 1
        input_data = np.random.rand(dim)
        trainer.train_minibatch([input_data])

    assert m.forward_calls == m.backward_calls == 100

    filepath = str(tmpdir / 'test_ext_train.dat')

    z.save(filepath)

    buf = open(filepath, 'rb').read()

    # this is only need for Python 2.7
    # (which does not distinguish between bytes and strings)
    if isinstance(buf, str):
        buf = bytearray(buf)

    z1 = Function.load(buf)

    m1 = z1.find_by_name('my_plus')
    # m1 is an instance of UserFunction, cannot directly downcast it to MyPlus,
    # using serialize as workaround:
    state = m1.serialize()['state']

    assert state['forward_calls'] == state['backward_calls'] == 100
コード例 #23
0
ファイル: distributed_test.py プロジェクト: OlegBoulanov/CNTK
def run_distributed_training(tmpdir, create_func):

    in1 = sequence.input_variable(shape=1)
    labels = sequence.input_variable(shape=1)
    p = parameter(shape=2, init=10)
    z = plus(in1, reduce_sum(p), name='z')
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error(z, labels)

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample)
    dist_learner = create_func(C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True))

    communicator = dist_learner.communicator()
    workers = communicator.workers()
    current_worker = communicator.current_worker()
    found_rank = False
    for wk in workers:
        if current_worker.global_rank == wk.global_rank:
            found_rank = True

    assert found_rank

    trainer = C.Trainer(z, (ce, errs), [ dist_learner ])
    in1_value = [[1],[2]]
    label_value = [[0], [1]]
    arguments = {in1: in1_value, labels: label_value}
    z_output = z.output
    updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output])
    
    p = str(tmpdir / 'checkpoint.dat')
    trainer.save_checkpoint(p)
    trainer.restore_from_checkpoint(p)

    communicator.barrier()

    assert trainer.model.name == 'z'

    # Ensure that Swig is not leaking raw types
    assert isinstance(trainer.model, Function)
    assert trainer.model.__doc__
コード例 #24
0
def create_trainer(network, minibatch_size, epoch_size, progress_printer):
    """ Create trainer 
    """

    # Set learning parameters
    lr_per_sample = [0.0015625] * 10 + [0.00046875] * 10 + [0.00015625]
    momentum_time_constant = [0] * 20 + [-minibatch_size / np.log(0.9)]
    l2_reg_weight = 0.002

    lr_schedule = learning_rate_schedule(lr_per_sample,
                                         epoch_size=epoch_size,
                                         unit=UnitType.sample)
    mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant)

    learner = momentum_sgd(network['output'].parameters,
                           lr_schedule,
                           mm_schedule,
                           l2_regularization_weight=l2_reg_weight)

    return Trainer(network['output'], (network['ce'], network['pe']), learner,
                   progress_printer)
コード例 #25
0
    def __init__(self, n_in, n_out, init_lr, momentum):

        self.param1 = 512
        self.param2 = 256

        self.n_in = int(n_in)
        self.n_out = int(n_out)
        self.input = C.sequence.input_variable(shape=(self.n_in, ))
        self.label = C.sequence.input_variable(shape=(self.n_out, ))

        self.three_dnn = Sequential([
            Dense(self.param1, activation=C.tanh),
            Dense(self.param1, activation=C.tanh),
            Dense(self.param1, activation=C.tanh)
        ])
        self.rnn_layer1 = Sequential([(Recurrence(LSTM(self.param2)),
                                       Recurrence(LSTM(self.param2),
                                                  go_backwards=True)),
                                      C.splice])
        self.rnn_layer2 = Sequential([(Recurrence(LSTM(self.param2)),
                                       Recurrence(LSTM(self.param2),
                                                  go_backwards=True)),
                                      C.splice])
        self.final_dnn = Dense(self.n_out)

        self.output = self.model(self.input)

        self.loss = loss_fun(self.output, self.label)
        self.eval_err = loss_fun(self.output, self.label)

        self.lr_s = C.learning_rate_schedule(init_lr, C.UnitType.sample)
        self.mom_s = C.momentum_schedule(momentum)
        self.learner = C.momentum_sgd(self.output.parameters,
                                      lr=self.lr_s,
                                      momentum=self.mom_s)
        self.trainer = C.Trainer(self.output, (self.loss, self.eval_err),
                                 [self.learner])
コード例 #26
0
ファイル: train.py プロジェクト: rnrneverdies/FERPlus
def main(base_folder, training_mode='majority', model_name='VGG13', max_epochs = 100):

    # create needed folders.
    output_model_path   = os.path.join(base_folder, R'models')
    output_model_folder = os.path.join(output_model_path, model_name + '_' + training_mode)
    if not os.path.exists(output_model_folder):
        os.makedirs(output_model_folder)

    # creating logging file 
    logging.basicConfig(filename = os.path.join(output_model_folder, "train.log"), filemode = 'w', level = logging.INFO)
    logging.getLogger().addHandler(logging.StreamHandler())

    logging.info("Starting with training mode {} using {} model and max epochs {}.".format(training_mode, model_name, max_epochs))

    # create the model
    num_classes = len(emotion_table)
    model       = build_model(num_classes, model_name)

    # set the input variables.
    input_var = ct.input((1, model.input_height, model.input_width), np.float32)
    label_var = ct.input((num_classes), np.float32)
    
    # read FER+ dataset.
    logging.info("Loading data...")
    train_params        = FERPlusParameters(num_classes, model.input_height, model.input_width, training_mode, False)
    test_and_val_params = FERPlusParameters(num_classes, model.input_height, model.input_width, "majority", True)

    train_data_reader   = FERPlusReader.create(base_folder, train_folders, "label.csv", train_params)
    val_data_reader     = FERPlusReader.create(base_folder, valid_folders, "label.csv", test_and_val_params)
    test_data_reader    = FERPlusReader.create(base_folder, test_folders, "label.csv", test_and_val_params)
    
    # print summary of the data.
    display_summary(train_data_reader, val_data_reader, test_data_reader)
    
    # get the probalistic output of the model.
    z    = model.model((input_var - 127.5)/127.5)
    pred = ct.softmax(z)
    
    epoch_size     = train_data_reader.size()
    minibatch_size = 32

    # Training config
    lr_per_minibatch       = [model.learning_rate]*20 + [model.learning_rate / 2.0]*20 + [model.learning_rate / 10.0]
    mm_time_constant       = -minibatch_size/np.log(0.9)
    lr_schedule            = ct.learning_rate_schedule(lr_per_minibatch, unit=ct.UnitType.minibatch, epoch_size=epoch_size)
    mm_schedule            = ct.momentum_as_time_constant_schedule(mm_time_constant)

    # loss and error cost
    train_loss = cost_func(training_mode, pred, label_var)
    pe         = ct.classification_error(z, label_var)

    # construct the trainer
    learner = ct.momentum_sgd(z.parameters, lr_schedule, mm_schedule)
    trainer = ct.Trainer(z, (train_loss, pe), learner)

    # Get minibatches of images to train with and perform model training
    max_val_accuracy    = 0.0
    final_test_accuracy = 0.0
    best_test_accuracy  = 0.0

    logging.info("Start training...")
    epoch      = 0
    best_epoch = 0
    while epoch < max_epochs: 
        train_data_reader.reset()
        val_data_reader.reset()
        test_data_reader.reset()
        
        # Training 
        start_time = time.time()
        training_loss = 0
        training_accuracy = 0
        while train_data_reader.has_more():
            images, labels, current_batch_size = train_data_reader.next_minibatch(minibatch_size)

            # Specify the mapping of input variables in the model to actual minibatch data to be trained with
            trainer.train_minibatch({input_var : images, label_var : labels})

            # keep track of statistics.
            training_loss     += trainer.previous_minibatch_loss_average * current_batch_size
            training_accuracy += trainer.previous_minibatch_evaluation_average * current_batch_size
                
        training_accuracy /= train_data_reader.size()
        training_accuracy = 1.0 - training_accuracy
        
        # Validation
        val_accuracy = 0
        while val_data_reader.has_more():
            images, labels, current_batch_size = val_data_reader.next_minibatch(minibatch_size)
            val_accuracy += trainer.test_minibatch({input_var : images, label_var : labels}) * current_batch_size
            
        val_accuracy /= val_data_reader.size()
        val_accuracy = 1.0 - val_accuracy
        
        # if validation accuracy goes higher, we compute test accuracy
        test_run = False
        if val_accuracy > max_val_accuracy:
            best_epoch = epoch
            max_val_accuracy = val_accuracy

            trainer.save_checkpoint(os.path.join(output_model_folder, "model_{}".format(best_epoch)))

            test_run = True
            test_accuracy = 0
            while test_data_reader.has_more():
                images, labels, current_batch_size = test_data_reader.next_minibatch(minibatch_size)
                test_accuracy += trainer.test_minibatch({input_var : images, label_var : labels}) * current_batch_size
            
            test_accuracy /= test_data_reader.size()
            test_accuracy = 1.0 - test_accuracy
            final_test_accuracy = test_accuracy
            if final_test_accuracy > best_test_accuracy: 
                best_test_accuracy = final_test_accuracy
 
        logging.info("Epoch {}: took {:.3f}s".format(epoch, time.time() - start_time))
        logging.info("  training loss:\t{:e}".format(training_loss))
        logging.info("  training accuracy:\t\t{:.2f} %".format(training_accuracy * 100))
        logging.info("  validation accuracy:\t\t{:.2f} %".format(val_accuracy * 100))
        if test_run:
            logging.info("  test accuracy:\t\t{:.2f} %".format(test_accuracy * 100))
            
        epoch += 1

    logging.info("")
    logging.info("Best validation accuracy:\t\t{:.2f} %, epoch {}".format(max_val_accuracy * 100, best_epoch))
    logging.info("Test accuracy corresponding to best validation:\t\t{:.2f} %".format(final_test_accuracy * 100))
    logging.info("Best test accuracy:\t\t{:.2f} %".format(best_test_accuracy * 100))

    pred.save('ferplus.onnx', ct.ModelFormat.ONNX)
コード例 #27
0
def train(train_x, train_y, seed, model_dir, loss_dir):
    input_dim = 600
    output_dim = 3631
    num_epochs = 100
    hidden_layer_type = ['TANH', 'TANH']
    hidden_layer_size = [1024, 1024]
    momentum = 0.9
    finetune_lr = 0.01
    l2_regularization_weight = 0.00001
    C.cntk_py.set_fixed_random_seed(seed)
    print('Creating DNN model...')
    input = C.input_variable(input_dim)
    output = C.input_variable(output_dim)
    dnn_model = create_dnn_model(input, hidden_layer_type, hidden_layer_size,
                                 output_dim)
    epoch_num = 0
    current_finetune_lr = finetune_lr
    current_momentum = momentum
    train_loss_output = []
    print('Learning...')
    while (epoch_num < num_epochs):
        print('started epoch %i' % epoch_num)
        epoch_num += 1
        sub_start_time = time.time()
        lr_schedule = C.learning_rate_schedule(current_finetune_lr,
                                               C.UnitType.minibatch)
        momentum_schedule = C.momentum_schedule(current_momentum)
        learner = C.momentum_sgd(
            dnn_model.parameters,
            lr_schedule,
            momentum_schedule,
            unit_gain=False,
            l1_regularization_weight=0,
            l2_regularization_weight=l2_regularization_weight)
        #learner = C.adadelta(dnn_model.parameters, lr_schedule, rho=0.95, epsilon=1e-8, l1_regularization_weight=0,
        #                    l2_regularization_weight= 0.00001 )
        loss = C.cross_entropy_with_softmax(dnn_model, output)
        error = loss
        trainer = C.Trainer(dnn_model, (loss, error), [learner])
        train_error = []
        for i in range(len(train_x)):
            temp_train_x = np.float32(train_x[i])
            temp_train_y = np.float32(train_y[i])
            trainer.train_minibatch({
                input: temp_train_x,
                output: temp_train_y
            })
            train_error.append(trainer.previous_minibatch_loss_average)
        this_train_loss = np.mean(train_error)
        sub_end_time = time.time()
        print('time for 1 epoch is %.1f' % (sub_end_time - sub_start_time))
        train_loss_output.append(this_train_loss)
        print('loss is %.4f' % this_train_loss)
        if np.remainder(epoch_num, 10) == 0:
            nnets_file_name = 'dnn_model_ep' + np.str(epoch_num) + '.model'
            if not os.path.isdir(model_dir):
                os.makedirs(model_dir)
            dnn_model.save(os.path.join(model_dir, nnets_file_name))
            if not os.path.isdir(loss_dir):
                os.makedirs(loss_dir)
            np.savetxt(
                os.path.join(loss_dir,
                             'loss_curve_ep' + np.str(epoch_num) + '.csv'),
                train_loss_output)
    nnets_file_name = 'dnn_model_final.model'
    if not os.path.isdir(model_dir):
        os.makedirs(model_dir)
    dnn_model.save(os.path.join(model_dir, nnets_file_name))
    if not os.path.isdir(loss_dir):
        os.makedirs(loss_dir)
    np.savetxt(
        os.path.join(loss_dir,
                     'loss_curve_final' + np.str(epoch_num) + '.csv'),
        train_loss_output)
コード例 #28
0
def test_lattice_deserializer(device_id):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    try_set_default_device(cntk_device(device_id))

    data_dir = ''
    if 'CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY' in os.environ:
        data_dir = os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY']
    else:
        print('CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY environment variable is not defined')

    print(data_dir)
    data_dir = os.path.join(data_dir, "Speech", "AN4Corpus", "v0")
    os.chdir(data_dir)
    feature_dimension = 33
    feature = C.sequence.input_variable(feature_dimension)

    label_dimension = 133
    label = C.sequence.input_variable(label_dimension)

    axis_lattice = C.Axis.new_unique_dynamic_axis('lattice_axis')
    lattice = C.sequence.input_variable(1, sequence_axis=axis_lattice)

    train_feature_filepath = os.path.join(data_dir,"glob_0000.scp")
    train_label_filepath = os.path.join(data_dir,"glob_0000.mlf")
    train_lattice_index_path = os.path.join(data_dir,"latticeIndex.txt")
    mapping_filepath = os.path.join(data_dir,"state.list")
    train_feature_stream = C.io.HTKFeatureDeserializer(
    C.io.StreamDefs(speech_feature = C.io.StreamDef(shape = feature_dimension, scp = train_feature_filepath)))
    train_label_stream = C.io.HTKMLFDeserializer(
    mapping_filepath, C.io.StreamDefs(speech_label = C.io.StreamDef(shape = label_dimension, mlf = train_label_filepath)), True)
    train_lattice_stream = C.io.LatticeDeserializer(train_lattice_index_path,C.io.StreamDefs(speech_lattice = C.io.StreamDef()))
    train_data_reader = C.io.MinibatchSource([train_feature_stream, train_label_stream, train_lattice_stream], frame_mode = False)
    train_input_map = {feature: train_data_reader.streams.speech_feature, label: train_data_reader.streams.speech_label, lattice: train_data_reader.streams.speech_lattice}

    feature_mean = np.fromfile(os.path.join("GlobalStats", "mean.363"), dtype=float, count=feature_dimension)
    feature_inverse_stddev = np.fromfile(os.path.join("GlobalStats", "var.363"), dtype=float, count=feature_dimension)

    feature_normalized = (feature - feature_mean) * feature_inverse_stddev

    with C.default_options(activation=C.sigmoid):
        z = C.layers.Sequential([
            C.layers.For(range(3), lambda: C.layers.Recurrence(C.layers.LSTM(1024))),
            C.layers.Dense(label_dimension)
        ])(feature_normalized)
    mbsize = 1024
    mbs_per_epoch = 10
    max_epochs = 2

    symListPath = os.path.join(data_dir,"CY2SCH010061231_1369712653.numden.lats.symlist")
    phonePath = os.path.join(data_dir,"model.overalltying")
    stateListPath = os.path.join(data_dir,"state.list")
    transProbPath = os.path.join(data_dir,"model.transprob")

    criteria = C.lattice_sequence_with_softmax(label, z, z, lattice, symListPath, phonePath, stateListPath, transProbPath)
    err = C.classification_error(label,z)
    lr = C.learning_parameter_schedule_per_sample([(3, .01), (1,.001)])
    mm = C.momentum_schedule([(1000, 0.9), (0, 0.99)], mbsize)
    learner = C.momentum_sgd(z.parameters, lr, mm)
    trainer = C.Trainer(z, (criteria, err), learner)

    C.logging.log_number_of_parameters(z)
    progress_printer = C.logging.progress_print.ProgressPrinter(tag='Training', num_epochs = max_epochs)


    for epoch in range(max_epochs):
        for mb in range(mbs_per_epoch):
            minibatch = train_data_reader.next_minibatch(mbsize, input_map = train_input_map)
            trainer.train_minibatch(minibatch)
            progress_printer.update_with_trainer(trainer, with_metric = True)

        progress_printer.epoch_summary(with_metric = True)

    assert np.allclose(trainer.previous_minibatch_evaluation_average, 0.15064, atol=TOLERANCE_ABSOLUTE)
    assert np.allclose(trainer.previous_minibatch_loss_average, 0.035923, atol=TOLERANCE_ABSOLUTE)
    assert (trainer.previous_minibatch_sample_count == 218)
    assert (trainer.total_number_of_samples_seen == 5750)
    print("Completed successfully.")
コード例 #29
0
ファイル: Conv3D_UCF11.py プロジェクト: OlegBoulanov/CNTK
def conv3d_ucf11(train_reader, test_reader, max_epochs=30):
    # Replace 0 with 1 to get detailed log.
    set_computation_network_trace_level(0)

    # These values must match for both train and test reader.
    image_height       = train_reader.height
    image_width        = train_reader.width
    num_channels       = train_reader.channel_count
    sequence_length    = train_reader.sequence_length
    num_output_classes = train_reader.label_count

    # Input variables denoting the features and label data
    input_var = C.input_variable((num_channels, sequence_length, image_height, image_width), np.float32)
    label_var = C.input_variable(num_output_classes, np.float32)

    # Instantiate simple 3D Convolution network inspired by VGG network 
    # and http://vlg.cs.dartmouth.edu/c3d/c3d_video.pdf
    with C.default_options (activation=C.relu):
        z = C.layers.Sequential([
            C.layers.Convolution3D((3,3,3), 64, pad=True),
            C.layers.MaxPooling((1,2,2), (1,2,2)),
            C.layers.For(range(3), lambda i: [
                C.layers.Convolution3D((3,3,3), [96, 128, 128][i], pad=True),
                C.layers.Convolution3D((3,3,3), [96, 128, 128][i], pad=True),
                C.layers.MaxPooling((2,2,2), (2,2,2))
            ]),
            C.layers.For(range(2), lambda : [
                C.layers.Dense(1024), 
                C.layers.Dropout(0.5)
            ]),
            C.layers.Dense(num_output_classes, activation=None)
        ])(input_var)
    
    # loss and classification error.
    ce = C.cross_entropy_with_softmax(z, label_var)
    pe = C.classification_error(z, label_var)

    # training config
    train_epoch_size     = train_reader.size()
    train_minibatch_size = 2

    # Set learning parameters
    lr_per_sample          = [0.01]*10+[0.001]*10+[0.0001]
    lr_schedule            = C.learning_rate_schedule(lr_per_sample, epoch_size=train_epoch_size, unit=C.UnitType.sample)
    momentum_time_constant = 4096
    mm_schedule            = C.momentum_as_time_constant_schedule([momentum_time_constant])

    # Instantiate the trainer object to drive the model training
    learner = C.momentum_sgd(z.parameters, lr_schedule, mm_schedule, True)
    progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs)
    trainer = C.Trainer(z, (ce, pe), learner, progress_printer)

    log_number_of_parameters(z) ; print()

    # Get minibatches of images to train with and perform model training
    for epoch in range(max_epochs):       # loop over epochs
        train_reader.reset()

        while train_reader.has_more():
            videos, labels, current_minibatch = train_reader.next_minibatch(train_minibatch_size)
            trainer.train_minibatch({input_var : videos, label_var : labels})

        trainer.summarize_training_progress()

    # Test data for trained model
    epoch_size     = test_reader.size()
    test_minibatch_size = 2

    # process minibatches and evaluate the model
    metric_numer    = 0
    metric_denom    = 0
    minibatch_index = 0

    test_reader.reset()    
    while test_reader.has_more():
        videos, labels, current_minibatch = test_reader.next_minibatch(test_minibatch_size)
        # minibatch data to be trained with
        metric_numer += trainer.test_minibatch({input_var : videos, label_var : labels}) * current_minibatch
        metric_denom += current_minibatch
        # Keep track of the number of samples processed so far.
        minibatch_index += 1

    print("")
    print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom))
    print("")

    return metric_numer/metric_denom
コード例 #30
0
                                   1, UnitType.minibatch),
                               momentum=C.momentum_schedule(0.9)),
    lambda params: C.nesterov(params,
                              lr=learning_rate_schedule(1, UnitType.minibatch),
                              momentum=C.momentum_schedule(0.9)),
    lambda params: C.rmsprop(params,
                             lr=learning_rate_schedule(1, UnitType.minibatch),
                             gamma=0.1,
                             inc=3.0,
                             dec=0.1,
                             max=np.inf,
                             min=1e-8),
    lambda params: C.sgd(params,
                         lr=learning_rate_schedule(1, UnitType.minibatch)),
    lambda params: C.momentum_sgd(params,
                                  lr=learning_rate_schedule(
                                      1, UnitType.minibatch),
                                  momentum=C.momentum_schedule(0.9))
]


@pytest.mark.parametrize("params, expectation", LR_SCHEDULE_PARAMS)
def test_learning_rate_schedule(params, expectation):
    l = learning_rate_schedule(*params)
    assert [l[i] for i in range(len(expectation))] == expectation


def sweep_based_schedule_fails():
    with pytest.raises(Exception):
        learning_rate_schedule([1], unit=UnitType.sample, epoch_size=0)

コード例 #31
0
def train_and_evaluate(reader_train, reader_test, max_epochs, model_func):
    # Input variables denoting the features and label data
    input_var = input_variable((num_channels, image_height, image_width))
    label_var = input_variable((num_classes))

    # Normalize the input
    feature_scale = 1.0 / 256.0
    input_var_norm = element_times(feature_scale, input_var)
    
    # apply model to input
    z = model_func(input_var_norm, out_dims=num_classes)

    #
    # Training action
    #

    # loss and metric
    ce = cross_entropy_with_softmax(z, label_var)
    pe = classification_error(z, label_var)

    # training config
    epoch_size     = 20000
    minibatch_size = 64

    # Set training parameters
    lr_per_minibatch       = learning_rate_schedule([0.01]*10 + [0.003]*10 + [0.001],  UnitType.minibatch, epoch_size)
    momentum_time_constant = momentum_as_time_constant_schedule(-minibatch_size/np.log(0.9))
    l2_reg_weight          = 0.001
    
    # trainer object
    progress_printer = ProgressPrinter(0)

    learner     = momentum_sgd(z.parameters, 
                               lr = lr_per_minibatch, momentum = momentum_time_constant, 
                               l2_regularization_weight=l2_reg_weight)
    trainer     = Trainer(z, (ce, pe), [learner], [progress_printer])

    # define mapping from reader streams to network inputs
    input_map = {
        input_var: reader_train.streams.features,
        label_var: reader_train.streams.labels
    }

    log_number_of_parameters(z) ; print()
    #progress_printer = ProgressPrinter(tag='Training')

    # perform model training
    stop_run=False
    batch_index = 0
    plot_data = {'batchindex':[], 'loss':[], 'error':[]}
    for epoch in range(max_epochs):       # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data = reader_train.next_minibatch(min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch.
            trainer.train_minibatch(data)                                   # update model with it

            sample_count += data[label_var].num_samples                     # count samples processed so far
            
            # For visualization...            
            plot_data['batchindex'].append(batch_index)
            plot_data['loss'].append(trainer.previous_minibatch_loss_average)
            plot_data['error'].append(trainer.previous_minibatch_evaluation_average)
            
            progress_printer.update_with_trainer(trainer, with_metric=True) # log progress
            batch_index += 1
            if trainer.previous_minibatch_evaluation_average < 0.025:
                stop_run=True
                break
        if stop_run:
            break
        progress_printer.epoch_summary(with_metric=True)
        #trainer.save_checkpoint(model_temp_file)
        
    #
    # Evaluation action
    #
    epoch_size     = 6600
    minibatch_size = 32

    # process minibatches and evaluate the model
    metric_numer    = 0
    metric_denom    = 0
    sample_count    = 0
    minibatch_index = 0

    input_map = {
        input_var: reader_test.streams.features,
        label_var: reader_test.streams.labels
    }

    while sample_count < epoch_size:
        current_minibatch = min(minibatch_size, epoch_size - sample_count)

        # Fetch next test min batch.
        data = reader_test.next_minibatch(current_minibatch, input_map=input_map)

        # minibatch data to be trained with
        metric_numer += trainer.test_minibatch(data) * current_minibatch
        metric_denom += current_minibatch

        # Keep track of the number of samples processed so far.
        sample_count += data[label_var].num_samples
        minibatch_index += 1

    print("")
    print("Final Results: Minibatch[1-{}]: errs = {:0.1f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom))
    print("")
    
    # Visualize training result:
    window_width            = 32
    loss_cumsum             = np.cumsum(np.insert(plot_data['loss'], 0, 0)) 
    error_cumsum            = np.cumsum(np.insert(plot_data['error'], 0, 0)) 

    # Moving average.
    plot_data['batchindex'] = np.insert(plot_data['batchindex'], 0, 0)[window_width:]
    plot_data['avg_loss']   = (loss_cumsum[window_width:] - loss_cumsum[:-window_width]) / window_width
    plot_data['avg_error']  = (error_cumsum[window_width:] - error_cumsum[:-window_width]) / window_width
    
    plt.figure(1)
    plt.subplot(211)
    plt.plot(plot_data["batchindex"], plot_data["avg_loss"], 'b--')
    plt.xlabel('Minibatch number')
    plt.ylabel('Loss')
    plt.title('Minibatch run vs. Training loss ')
    
    plt.show()

    plt.subplot(212)
    plt.plot(plot_data["batchindex"], plot_data["avg_error"], 'r--')
    plt.xlabel('Minibatch number')
    plt.ylabel('Label Prediction Error')
    plt.title('Minibatch run vs. Label Prediction Error ')
    plt.show()
    
    return softmax(z)
コード例 #32
0
        print(str_out)
        assert False

if __name__=='__main__':
    in1 = C.input_variable(shape=1)
    labels = C.input_variable(shape=1)
    p1 = parameter(shape=1)
    p2 = parameter(shape=1)
    n = plus(in1, p1, name='n')
    z = plus(n, p2, name='z')
    ce = squared_error(z, labels)

    momentum_schedule = C.momentum_schedule_per_sample(0.9990913221888589)
    lr_per_sample = C.learning_parameter_schedule_per_sample(0.007)
    dist_learners = [
        C.distributed.data_parallel_distributed_learner(C.momentum_sgd([p1], lr_per_sample, momentum_schedule, True)),
        C.distributed.data_parallel_distributed_learner(C.momentum_sgd([p2], lr_per_sample, momentum_schedule, True))
    ]

    trainer = C.Trainer(z, ce, dist_learners)
    in1_value = [[1]]
    label_value = [[0]]
    arguments = {in1: in1_value, labels: label_value}
    z_output = z.output

    def check_samples(learners, expected_number_of_samples):
        for learner in learners:
            if learner.total_number_of_samples_seen != expected_number_of_samples:
                print("Completed with exception.")
                raise ValueError("%d samples expected, got %d" % (expected_number_of_samples, learner.total_number_of_samples_seen))
コード例 #33
0
    def __train_cntk(self, path_to_folder: str, model_definition, epochs: int,
                     output_model_path: str, classes, minibatch_size: int):
        import cntk
        from cntk.learners import learning_parameter_schedule
        from cntk.ops import input_variable
        from cntk.io import MinibatchSource, ImageDeserializer, StreamDefs, StreamDef, MinibatchData, UserDeserializer
        import cntk.io.transforms as xforms
        from cntk.layers import default_options, Dense, Sequential, Activation, Embedding, Convolution2D, MaxPooling, Stabilizer, Convolution, Dropout, BatchNormalization
        from cntk.ops.functions import CloneMethod
        from cntk.logging import ProgressPrinter
        from cntk.losses import cross_entropy_with_softmax
        from cntk import classification_error, softmax, relu, ModelFormat, element_times, momentum_schedule, momentum_sgd
        import pandas as pd

        path_to_folder = path_to_folder.rstrip('/')

        map_file_train = path_to_folder + "/train_map.txt"
        map_file_test = path_to_folder + "/test_map.txt"
        classes_set = set()
        num_train = 0
        num_test = 0
        num_channels = 3

        class TrackDataset(UserDeserializer):
            def __init__(self, map_file, streams, chunksize=100):
                super(TrackDataset, self).__init__()
                self._batch_size = chunksize
                self.dataframes = pd.read_csv(map_file,
                                              sep='\t',
                                              dtype=str,
                                              header=None,
                                              names=["features", "labels"])
                self._streams = [
                    cntk.io.StreamInformation(s['name'], i, 'dense',
                                              np.float32, s['shape'])
                    for i, s in enumerate(streams)
                ]

                self._num_chunks = int(
                    math.ceil(len(self.dataframes) / chunksize))

            def _scale_image(self, image, width=224, height=168):
                try:
                    return image.resize((width, height), Image.LINEAR)
                except:
                    raise Exception('scale_image error')

            def stream_infos(self):
                return self._streams

            def num_chunks(self):
                return self._num_chunks

            def get_chunk(self, chunk_id):
                images = []
                labels = []
                maximum = (chunk_id + 1) * self._batch_size
                if (maximum > len(self.dataframes)):
                    maximum = len(self.dataframes)
                for i in range(chunk_id * self._batch_size, maximum):
                    img_name = self.dataframes.iloc[i, 0]
                    image = Image.open(img_name)
                    cl = self.dataframes.iloc[i, 1:].values[0]
                    image = self._scale_image(image)
                    image = np.moveaxis((np.array(image).astype('float32')),
                                        -1, 0)
                    image -= np.mean(image, keepdims=True)
                    image /= (np.std(image, keepdims=True) + 1e-6)
                    images.append(image)
                    yv = np.zeros(num_classes)
                    yv[classes.index(cl)] = 1
                    labels.append(yv)

                result = {}
                features = np.array(images)
                lab = np.array(labels).astype('float32')
                result[self._streams[0].m_name] = features
                result[self._streams[1].m_name] = lab
                return result

        try:
            with open(map_file_train) as f:
                csv_reader = csv.reader(f, delimiter='\t')
                for row in csv_reader:
                    cmd = row[1]
                    classes_set.add(cmd)
                    num_train = num_train + 1
        except Exception as e:
            raise Exception(
                "No train_map.txt file found in path " + path_to_folder +
                ". Did you create a dataset using create_balanced_dataset()?")

        num_classes = len(classes)

        with open(map_file_test) as f:
            for num_test, l in enumerate(f):
                pass

        # transforms = [
        #     xforms.scale(width=self.__image_width, height=self.__image_height, channels=num_channels, interpolations='linear'),
        #     xforms.mean(mean_file)
        # ]

        dataset_train = TrackDataset(map_file=map_file_train,
                                     streams=[
                                         dict(name='features',
                                              shape=(num_channels,
                                                     self.__image_height,
                                                     self.__image_width)),
                                         dict(name='labels',
                                              shape=(num_classes, ))
                                     ])
        reader_train = MinibatchSource([dataset_train], randomize=True)

        # a = dataset_train.num_chunks()

        dataset_test = TrackDataset(map_file=map_file_test,
                                    streams=[
                                        dict(name='features',
                                             shape=(num_channels,
                                                    self.__image_height,
                                                    self.__image_width)),
                                        dict(name='labels',
                                             shape=(num_classes, ))
                                    ])
        reader_test = MinibatchSource([dataset_test], randomize=True)

        # ImageDeserializer loads images in the BGR format, not RGB
        # reader_train = MinibatchSource(ImageDeserializer(map_file_train, StreamDefs(
        #     features = StreamDef(field='image', transforms=transforms),
        #     labels   = StreamDef(field='label', shape=num_classes)
        # )))

        # reader_test = MinibatchSource(ImageDeserializer(map_file_test, StreamDefs(
        #     features = StreamDef(field='image', transforms=transforms),
        #     labels   = StreamDef(field='label', shape=num_classes)
        # )))

        # mb = reader_train.next_minibatch(10)

        input_var = input_variable(
            (num_channels, self.__image_height, self.__image_width))
        label_var = input_variable((num_classes))

        model = model_definition(input_var)

        ce = cross_entropy_with_softmax(model, label_var)
        pe = classification_error(model, label_var)

        epoch_size = num_train

        lr_per_minibatch = learning_parameter_schedule([0.01] * 10 +
                                                       [0.003] * 10 + [0.001],
                                                       epoch_size=epoch_size)
        momentums = momentum_schedule(0.9, minibatch_size=minibatch_size)
        l2_reg_weight = 0.001

        learner = momentum_sgd(model.parameters,
                               lr=lr_per_minibatch,
                               momentum=momentums,
                               l2_regularization_weight=l2_reg_weight)
        progress_printer = ProgressPrinter(tag='Training', num_epochs=epochs)
        trainer = cntk.train.Trainer(model, (ce, pe), [learner],
                                     [progress_printer])

        input_map = {
            input_var: reader_train.streams.features,
            label_var: reader_train.streams.labels
        }

        print("Training started")
        batch_index = 0
        plot_data = {'batchindex': [], 'loss': [], 'error': []}
        for epoch in range(epochs):
            sample_count = 0
            while sample_count < epoch_size:
                data: MinibatchSource = reader_train.next_minibatch(
                    min(minibatch_size, epoch_size - sample_count),
                    input_map=input_map)

                trainer.train_minibatch(data)
                sample_count += data[label_var].num_samples

                batch_index += 1
                plot_data['batchindex'].append(batch_index)
                plot_data['loss'].append(
                    trainer.previous_minibatch_loss_average)
                plot_data['error'].append(
                    trainer.previous_minibatch_evaluation_average)

            trainer.summarize_training_progress()

        metric_numer = 0
        metric_denom = 0
        sample_count = 0
        minibatch_index = 0
        epoch_size = num_test

        while sample_count < epoch_size:
            current_minibatch = min(minibatch_size, epoch_size - sample_count)

            data = reader_test.next_minibatch(current_minibatch,
                                              input_map=input_map)

            metric_numer += trainer.test_minibatch(data) * current_minibatch
            metric_denom += current_minibatch

            sample_count += data[label_var].num_samples
            minibatch_index += 1

        print("")
        print("Final Results: Minibatch[1-{}]: errs = {:0.1f}% * {}".format(
            minibatch_index + 1, (metric_numer * 100.0) / metric_denom,
            metric_denom))
        print("")

        model.save(output_model_path, format=ModelFormat.ONNX)
コード例 #34
0
ファイル: learner_test.py プロジェクト: junaidnaseer/CNTK
def test_learner_init():
    i = C.input_variable(shape=(1,), needs_gradient=True, name='a')
    w = parameter(shape=(1,))

    res = i * w

    #test new API: learning_parameter_schedule

    #explicitly specify reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=0.1, minibatch_size = 25)
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == 25 #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 25
    assert learner.learning_rate() == 0.1

    #no explicitly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1))
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1


    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20), minibatch_size = 25)
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == 25 #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 20
    assert learner.learning_rate() == 0.1


    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20))
    assert learner.is_compatible_mode() == False
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 20
    assert learner.learning_rate() == 0.1

    #no explicitly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1))
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1


    #no explicitly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1), minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1


    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20), minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 20
    assert learner.learning_rate() == 0.1

    #no explicitly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1), minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1

    mysgd = C.sgd(parameters=res.parameters, lr=0.4, minibatch_size=32)
    assert mysgd.minibatch_size == 32
    assert mysgd._learning_rate_schedule.minibatch_size == 32
    assert mysgd.learning_rate() == 0.4

    mymomentum = C.momentum_sgd(parameters=res.parameters, lr=0.4, momentum=0.9, minibatch_size=32)
    assert mymomentum.minibatch_size == 32
    assert mymomentum._learning_rate_schedule.minibatch_size == 32
    assert mymomentum.learning_rate() == 0.4

    myadadelta = C.adadelta(parameters=res.parameters, lr=0.4, minibatch_size=32)
    assert myadadelta.minibatch_size == 32
    assert myadadelta._learning_rate_schedule.minibatch_size == 32
    assert myadadelta.learning_rate() == 0.4

    myadam = C.adam(parameters=res.parameters, lr=0.4, momentum=0.9, variance_momentum=0.9, minibatch_size=32)
    assert myadam.minibatch_size == 32
    assert myadam._learning_rate_schedule.minibatch_size == 32
    assert myadam.learning_rate() == 0.4

    myadagrad = C.adagrad(parameters=res.parameters, lr=0.4, minibatch_size=32)
    assert myadagrad.minibatch_size == 32
    assert myadagrad._learning_rate_schedule.minibatch_size == 32
    assert myadagrad.learning_rate() == 0.4

    myfsadagrad = C.fsadagrad(parameters=res.parameters, lr=0.4, momentum=0.9, variance_momentum=0.9,
                              minibatch_size=32)
    assert myfsadagrad.minibatch_size == 32
    assert myfsadagrad._learning_rate_schedule.minibatch_size == 32
    assert myfsadagrad.learning_rate() == 0.4

    mynesterov = C.nesterov(parameters=res.parameters, lr=0.4, momentum=0.9, minibatch_size=32)
    assert mynesterov.minibatch_size == 32
    assert mynesterov._learning_rate_schedule.minibatch_size == 32
    assert mynesterov.learning_rate() == 0.4

    myrmsrop = C.rmsprop(parameters=res.parameters, lr=0.4, gamma=0.5, inc=1.2, dec=0.7, max=10, min=1e-8,
                         minibatch_size=32)
    assert myrmsrop.minibatch_size == 32
    assert myrmsrop._learning_rate_schedule.minibatch_size == 32
    assert myrmsrop.learning_rate() == 0.4

    mysgd = C.sgd(parameters=res.parameters, lr=[0.4, 0.1, 0.001], minibatch_size=32, epoch_size=512)
    assert mysgd.minibatch_size == 32
    assert mysgd._learning_rate_schedule.minibatch_size == 32
    assert mysgd._learning_rate_schedule[0] == 0.4
    assert mysgd._learning_rate_schedule[512] == 0.1
    assert mysgd._learning_rate_schedule[512 * 2] == 0.001

    mymomentum = C.momentum_sgd(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9],
                                minibatch_size=32, epoch_size=512)
    assert mymomentum.minibatch_size == 32
    assert mymomentum._learning_rate_schedule.minibatch_size == 32
    assert mymomentum._learning_rate_schedule[0] == 0.4
    assert mymomentum._learning_rate_schedule[512] == 0.1
    assert mymomentum._learning_rate_schedule[512 * 2] == 0.001


    myadadelta = C.adadelta(parameters=res.parameters, lr=[0.4, 0.1, 0.001],
                            minibatch_size=32, epoch_size=512)
    assert myadadelta.minibatch_size == 32
    assert myadadelta._learning_rate_schedule.minibatch_size == 32
    assert myadadelta._learning_rate_schedule[0] == 0.4
    assert myadadelta._learning_rate_schedule[512] == 0.1
    assert myadadelta._learning_rate_schedule[512 * 2] == 0.001

    myadam = C.adam(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9, 0.1, 0.001], variance_momentum=[0.9],
                    minibatch_size=32, epoch_size=512)
    assert myadam.minibatch_size == 32
    assert myadam._learning_rate_schedule.minibatch_size == 32
    assert myadam._learning_rate_schedule[0] == 0.4
    assert myadam._learning_rate_schedule[512] == 0.1
    assert myadam._learning_rate_schedule[512 * 2] == 0.001

    myadagrad = C.adagrad(parameters=res.parameters, lr=[0.4, 0.1, 0.001], minibatch_size=32, epoch_size=512)
    assert myadagrad.minibatch_size == 32
    assert myadagrad._learning_rate_schedule.minibatch_size == 32
    assert myadagrad._learning_rate_schedule[0] == 0.4
    assert myadagrad._learning_rate_schedule[512] == 0.1
    assert myadagrad._learning_rate_schedule[512 * 2] == 0.001

    myfsadagrad = C.fsadagrad(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9],
                              variance_momentum=[0.9],
                              minibatch_size=32, epoch_size=512)
    assert myadagrad.minibatch_size == 32
    assert myadagrad._learning_rate_schedule.minibatch_size == 32
    assert myadagrad._learning_rate_schedule[0] == 0.4
    assert myadagrad._learning_rate_schedule[512] == 0.1
    assert myadagrad._learning_rate_schedule[512 * 2] == 0.001

    mynesterov = C.nesterov(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9],
                            minibatch_size=32, epoch_size=512)
    assert mynesterov.minibatch_size == 32
    assert mynesterov._learning_rate_schedule.minibatch_size == 32
    assert mynesterov._learning_rate_schedule[0] == 0.4
    assert mynesterov._learning_rate_schedule[512] == 0.1
    assert mynesterov._learning_rate_schedule[512 * 2] == 0.001

    myrmsrop = C.rmsprop(parameters=res.parameters, lr=[0.4, 0.1, 0.001], gamma=0.5, inc=1.2, dec=0.7, max=10,
                         min=1e-8,
                         minibatch_size=32, epoch_size=512)
    assert myrmsrop.minibatch_size == 32
    assert myrmsrop._learning_rate_schedule.minibatch_size == 32
    assert myrmsrop._learning_rate_schedule[0] == 0.4
    assert myrmsrop._learning_rate_schedule[512] == 0.1
    assert myrmsrop._learning_rate_schedule[512 * 2] == 0.001

    learner_parameter = learner.parameters
    from cntk.variables import Parameter
    param = learner_parameter[0]
    assert isinstance(param, Parameter)

    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    momentum = C.momentum_schedule(0.999, minibatch_size=1)
    lr_per_sample = learning_parameter_schedule(0.1, minibatch_size = 1)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum, unit_gain_value)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum, unit_gain=unit_gain_value)

    C.set_default_unit_gain_value(False)
    unit_gain_value = C.default_unit_gain_value()
    assert not unit_gain_value

    lr_per_sample = learning_parameter_schedule([0.1, 0.2], minibatch_size = 1)
    C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum)
    C.nesterov(res.parameters, lr_per_sample, momentum, unit_gain_value)
    C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum, unit_gain=unit_gain_value)

    lr_per_sample = learning_parameter_schedule([0.1]*3 +[0.2]*2 +[0.3], minibatch_size=1)
    C.adagrad(res.parameters, lr=lr_per_sample, need_ave_multiplier=True)

    C.set_default_unit_gain_value(True)
    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    lr_per_sample = learning_parameter_schedule([(3,0.1), (2, 0.2), (1, 0.3)], minibatch_size=1)
    C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum)
    C.fsadagrad(res.parameters, lr_per_sample, momentum, unit_gain_value)
    C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum, unit_gain=unit_gain_value)

    gamma, inc, dec, max, min = [0.5, 1.2, 0.7, 10, 1e-8]
    lr_per_sample = learning_parameter_schedule([0.1, 0.2], minibatch_size = 1, epoch_size = 100)
    C.rmsprop(res.parameters, lr_per_sample, gamma, inc, dec, max, min, True)

    C.adadelta(res.parameters, lr_per_sample)
コード例 #35
0
        print(str_out)
        assert False

if __name__=='__main__':
    in1 = C.input_variable(shape=1)
    labels = C.input_variable(shape=1)
    p1 = parameter(shape=1)
    p2 = parameter(shape=1)
    n = plus(in1, p1, name='n')
    z = plus(n, p2, name='z')
    ce = squared_error(z, labels)

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample)
    dist_learners = [
        C.distributed.data_parallel_distributed_learner(C.momentum_sgd([p1], lr_per_sample, momentum_time_constant, True)),
        C.distributed.data_parallel_distributed_learner(C.momentum_sgd([p2], lr_per_sample, momentum_time_constant, True))
    ]

    trainer = C.Trainer(z, ce, dist_learners)
    in1_value = [[1]]
    label_value = [[0]]
    arguments = {in1: in1_value, labels: label_value}
    z_output = z.output

    def check_samples(learners, expected_number_of_samples):
        for learner in learners:
            if learner.total_number_of_samples_seen != expected_number_of_samples:
                print("Completed with exception.")
                raise ValueError("%d samples expected, got %d" % (expected_number_of_samples, learner.total_number_of_samples_seen))
コード例 #36
0
def train(reader_train, reader_test, samples_per_epoch, max_amount_of_epochs,
          samples_per_minibatch, dimensions, classes, learning_rate,
          output_directory, with_tf):
    features = input_variable(shape=(dimensions['depth'], dimensions['height'],
                                     dimensions['width']))
    label = input_variable(shape=len(classes))

    # speeds up training
    normalized_features = element_times(1.0 / 256.0, features)
    if with_tf:
        base_model = {
            'model_file':
            os.path.join("..", "..", "Pretrained Models/ResNet_18.model"),
            'feature_node_name':
            'features',
            'last_hidden_node_name':
            'z.x',
            'image_dims': (3, 224, 224)
        }
        model = create_tf_model(base_model,
                                num_classes=len(classes),
                                input_features=normalized_features,
                                freeze=True)
    else:
        model = create_model(feature_dimensions=normalized_features,
                             classes=classes)

    loss = cross_entropy_with_softmax(model, label)
    metric = classification_error(model, label)
    learner = momentum_sgd(parameters=model.parameters,
                           lr=learning_rate_schedule(learning_rate,
                                                     UnitType.minibatch),
                           momentum=0.9,
                           l2_regularization_weight=0.0005)

    reporter = ProgressPrinter(tag='training', num_epochs=max_amount_of_epochs)

    trainer = Trainer(model=model,
                      criterion=(loss, metric),
                      parameter_learners=[learner],
                      progress_writers=[reporter])

    log_number_of_parameters(model)

    map_input_to_streams_train = {
        features: reader_train.streams.features,
        label: reader_train.streams.labels
    }

    map_input_to_streams_test = {
        features: reader_test.streams.features,
        label: reader_test.streams.labels
    }

    training_session(
        trainer=trainer,
        mb_source=reader_train,
        model_inputs_to_streams=map_input_to_streams_train,
        mb_size=samples_per_minibatch,
        progress_frequency=samples_per_epoch,
        checkpoint_config=CheckpointConfig(frequency=samples_per_epoch,
                                           filename=os.path.join(
                                               output_directory,
                                               "ConvNet_Lego_VisiOn"),
                                           restore=False),
        test_config=TestConfig(
            reader_test,
            minibatch_size=samples_per_minibatch,
            model_inputs_to_streams=map_input_to_streams_test)).train()
    network = {'features': features, 'label': label, 'model': softmax(model)}
    return network
コード例 #37
0
def test_learner_init():
    i = C.input_variable(shape=(1, ), needs_gradient=True, name='a')
    w = parameter(shape=(1, ))

    res = i * w

    #test new API: learning_parameter_schedule

    #explictly specify reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=0.1, minibatch_size=25)
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == 25  #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 25
    assert learner.learning_rate() == 0.1

    #no explictly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1))
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == C.learners.IGNORE  #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1

    learner = sgd(res.parameters,
                  lr=learning_parameter_schedule(0.1, 20),
                  minibatch_size=25)
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == 25  #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 20
    assert learner.learning_rate() == 0.1

    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20))
    assert learner.is_compatible_mode() == False
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 20
    assert learner.learning_rate() == 0.1

    #no explictly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1))
    assert learner.is_compatible_mode() == False
    assert learner.minibatch_size == C.learners.IGNORE  #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1

    #no explictly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters,
                  lr=learning_parameter_schedule(0.1),
                  minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.minibatch_size == C.learners.IGNORE  #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1

    learner = sgd(res.parameters,
                  lr=learning_parameter_schedule(0.1, 20),
                  minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.minibatch_size == C.learners.IGNORE  #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == 20
    assert learner.learning_rate() == 0.1

    #no explictly specification of reference minibatch size and learning rate is in number:
    learner = sgd(res.parameters,
                  lr=learning_parameter_schedule(0.1),
                  minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.minibatch_size == C.learners.IGNORE  #the learner's reference minibatch
    #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters:
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.learning_rate() == 0.1

    mysgd = C.sgd(parameters=res.parameters, lr=0.4, minibatch_size=32)
    assert mysgd.minibatch_size == 32
    assert mysgd._learning_rate_schedule.minibatch_size == 32
    assert mysgd.learning_rate() == 0.4

    mymomentum = C.momentum_sgd(parameters=res.parameters,
                                lr=0.4,
                                momentum=0.9,
                                minibatch_size=32)
    assert mymomentum.minibatch_size == 32
    assert mymomentum._learning_rate_schedule.minibatch_size == 32
    assert mymomentum.learning_rate() == 0.4

    myadadelta = C.adadelta(parameters=res.parameters,
                            lr=0.4,
                            minibatch_size=32)
    assert myadadelta.minibatch_size == 32
    assert myadadelta._learning_rate_schedule.minibatch_size == 32
    assert myadadelta.learning_rate() == 0.4

    myadam = C.adam(parameters=res.parameters,
                    lr=0.4,
                    momentum=0.9,
                    variance_momentum=0.9,
                    minibatch_size=32)
    assert myadam.minibatch_size == 32
    assert myadam._learning_rate_schedule.minibatch_size == 32
    assert myadam.learning_rate() == 0.4

    myadagrad = C.adagrad(parameters=res.parameters, lr=0.4, minibatch_size=32)
    assert myadagrad.minibatch_size == 32
    assert myadagrad._learning_rate_schedule.minibatch_size == 32
    assert myadagrad.learning_rate() == 0.4

    myfsadagrad = C.fsadagrad(parameters=res.parameters,
                              lr=0.4,
                              momentum=0.9,
                              variance_momentum=0.9,
                              minibatch_size=32)
    assert myfsadagrad.minibatch_size == 32
    assert myfsadagrad._learning_rate_schedule.minibatch_size == 32
    assert myfsadagrad.learning_rate() == 0.4

    mynesterov = C.nesterov(parameters=res.parameters,
                            lr=0.4,
                            momentum=0.9,
                            minibatch_size=32)
    assert mynesterov.minibatch_size == 32
    assert mynesterov._learning_rate_schedule.minibatch_size == 32
    assert mynesterov.learning_rate() == 0.4

    myrmsrop = C.rmsprop(parameters=res.parameters,
                         lr=0.4,
                         gamma=0.5,
                         inc=1.2,
                         dec=0.7,
                         max=10,
                         min=1e-8,
                         minibatch_size=32)
    assert myrmsrop.minibatch_size == 32
    assert myrmsrop._learning_rate_schedule.minibatch_size == 32
    assert myrmsrop.learning_rate() == 0.4

    mysgd = C.sgd(parameters=res.parameters,
                  lr=[0.4, 0.1, 0.001],
                  minibatch_size=32,
                  epoch_size=512)
    assert mysgd.minibatch_size == 32
    assert mysgd._learning_rate_schedule.minibatch_size == 32
    assert mysgd._learning_rate_schedule[0] == 0.4
    assert mysgd._learning_rate_schedule[512] == 0.1
    assert mysgd._learning_rate_schedule[512 * 2] == 0.001

    mymomentum = C.momentum_sgd(parameters=res.parameters,
                                lr=[0.4, 0.1, 0.001],
                                momentum=[0.9],
                                minibatch_size=32,
                                epoch_size=512)
    assert mymomentum.minibatch_size == 32
    assert mymomentum._learning_rate_schedule.minibatch_size == 32
    assert mymomentum._learning_rate_schedule[0] == 0.4
    assert mymomentum._learning_rate_schedule[512] == 0.1
    assert mymomentum._learning_rate_schedule[512 * 2] == 0.001

    myadadelta = C.adadelta(parameters=res.parameters,
                            lr=[0.4, 0.1, 0.001],
                            minibatch_size=32,
                            epoch_size=512)
    assert myadadelta.minibatch_size == 32
    assert myadadelta._learning_rate_schedule.minibatch_size == 32
    assert myadadelta._learning_rate_schedule[0] == 0.4
    assert myadadelta._learning_rate_schedule[512] == 0.1
    assert myadadelta._learning_rate_schedule[512 * 2] == 0.001

    myadam = C.adam(parameters=res.parameters,
                    lr=[0.4, 0.1, 0.001],
                    momentum=[0.9, 0.1, 0.001],
                    variance_momentum=[0.9],
                    minibatch_size=32,
                    epoch_size=512)
    assert myadam.minibatch_size == 32
    assert myadam._learning_rate_schedule.minibatch_size == 32
    assert myadam._learning_rate_schedule[0] == 0.4
    assert myadam._learning_rate_schedule[512] == 0.1
    assert myadam._learning_rate_schedule[512 * 2] == 0.001

    myadagrad = C.adagrad(parameters=res.parameters,
                          lr=[0.4, 0.1, 0.001],
                          minibatch_size=32,
                          epoch_size=512)
    assert myadagrad.minibatch_size == 32
    assert myadagrad._learning_rate_schedule.minibatch_size == 32
    assert myadagrad._learning_rate_schedule[0] == 0.4
    assert myadagrad._learning_rate_schedule[512] == 0.1
    assert myadagrad._learning_rate_schedule[512 * 2] == 0.001

    myfsadagrad = C.fsadagrad(parameters=res.parameters,
                              lr=[0.4, 0.1, 0.001],
                              momentum=[0.9],
                              variance_momentum=[0.9],
                              minibatch_size=32,
                              epoch_size=512)
    assert myadagrad.minibatch_size == 32
    assert myadagrad._learning_rate_schedule.minibatch_size == 32
    assert myadagrad._learning_rate_schedule[0] == 0.4
    assert myadagrad._learning_rate_schedule[512] == 0.1
    assert myadagrad._learning_rate_schedule[512 * 2] == 0.001

    mynesterov = C.nesterov(parameters=res.parameters,
                            lr=[0.4, 0.1, 0.001],
                            momentum=[0.9],
                            minibatch_size=32,
                            epoch_size=512)
    assert mynesterov.minibatch_size == 32
    assert mynesterov._learning_rate_schedule.minibatch_size == 32
    assert mynesterov._learning_rate_schedule[0] == 0.4
    assert mynesterov._learning_rate_schedule[512] == 0.1
    assert mynesterov._learning_rate_schedule[512 * 2] == 0.001

    myrmsrop = C.rmsprop(parameters=res.parameters,
                         lr=[0.4, 0.1, 0.001],
                         gamma=0.5,
                         inc=1.2,
                         dec=0.7,
                         max=10,
                         min=1e-8,
                         minibatch_size=32,
                         epoch_size=512)
    assert myrmsrop.minibatch_size == 32
    assert myrmsrop._learning_rate_schedule.minibatch_size == 32
    assert myrmsrop._learning_rate_schedule[0] == 0.4
    assert myrmsrop._learning_rate_schedule[512] == 0.1
    assert myrmsrop._learning_rate_schedule[512 * 2] == 0.001

    learner_parameter = learner.parameters
    from cntk.variables import Parameter
    param = learner_parameter[0]
    assert isinstance(param, Parameter)

    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    momentum = C.momentum_schedule(0.999, minibatch_size=1)
    lr_per_sample = learning_parameter_schedule(0.1, minibatch_size=1)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum, unit_gain_value)
    C.momentum_sgd(res.parameters,
                   lr_per_sample,
                   momentum,
                   unit_gain=unit_gain_value)

    C.set_default_unit_gain_value(False)
    unit_gain_value = C.default_unit_gain_value()
    assert not unit_gain_value

    lr_per_sample = learning_parameter_schedule([0.1, 0.2], minibatch_size=1)
    C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum)
    C.nesterov(res.parameters, lr_per_sample, momentum, unit_gain_value)
    C.nesterov(res.parameters,
               lr=lr_per_sample,
               momentum=momentum,
               unit_gain=unit_gain_value)

    lr_per_sample = learning_parameter_schedule([0.1] * 3 + [0.2] * 2 + [0.3],
                                                minibatch_size=1)
    C.adagrad(res.parameters, lr=lr_per_sample, need_ave_multiplier=True)

    C.set_default_unit_gain_value(True)
    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    lr_per_sample = learning_parameter_schedule([(3, 0.1), (2, 0.2), (1, 0.3)],
                                                minibatch_size=1)
    C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum)
    C.fsadagrad(res.parameters, lr_per_sample, momentum, unit_gain_value)
    C.fsadagrad(res.parameters,
                lr=lr_per_sample,
                momentum=momentum,
                unit_gain=unit_gain_value)

    gamma, inc, dec, max, min = [0.5, 1.2, 0.7, 10, 1e-8]
    lr_per_sample = learning_parameter_schedule([0.1, 0.2],
                                                minibatch_size=1,
                                                epoch_size=100)
    C.rmsprop(res.parameters, lr_per_sample, gamma, inc, dec, max, min, True)

    C.adadelta(res.parameters, lr_per_sample)
コード例 #38
0
def test_learner_init():
    i = C.input_variable(shape=(1,), needs_gradient=True, name='a')
    w = parameter(shape=(1,))

    res = i * w

    learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.sample))
    assert learner.learning_rate() == 0.1
    
    learner.reset_learning_rate(learning_rate_schedule([1,2,3], UnitType.minibatch));
    assert learner.learning_rate() == 1.0

    learner_parameter = learner.parameters
    from cntk.variables import Parameter
    param = learner_parameter[0]
    assert isinstance(param, Parameter)

    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = learning_rate_schedule(0.1, UnitType.sample)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant, unit_gain_value)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant, unit_gain=unit_gain_value)

    C.set_default_unit_gain_value(False)
    unit_gain_value = C.default_unit_gain_value()
    assert not unit_gain_value

    lr_per_sample = learning_rate_schedule([0.1, 0.2], UnitType.sample)
    C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant)
    C.nesterov(res.parameters, lr_per_sample, momentum_time_constant, unit_gain_value)
    C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant, unit_gain=unit_gain_value)

    lr_per_sample = learning_rate_schedule([0.1]*3 +[0.2]*2 +[0.3], UnitType.sample)
    C.adagrad(res.parameters, lr=lr_per_sample, need_ave_multiplier=True)

    C.set_default_unit_gain_value(True)
    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    lr_per_sample = learning_rate_schedule([(3,0.1), (2, 0.2), (1, 0.3)], UnitType.sample)
    C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant)
    C.fsadagrad(res.parameters, lr_per_sample, momentum_time_constant, unit_gain_value)
    C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant, unit_gain=unit_gain_value)

    gamma, inc, dec, max, min = [0.1]*5
    lr_per_sample = learning_rate_schedule([0.1, 0.2], UnitType.sample, 100)
    C.rmsprop(res.parameters, lr_per_sample, gamma, inc, dec, max, min, True)

    C.set_default_use_mean_gradient_value(False)
    use_mean_gradient_value = C.default_use_mean_gradient_value()
    assert not use_mean_gradient_value

    C.adadelta(res.parameters, lr_per_sample)
    
    C.set_default_use_mean_gradient_value(True)
    use_mean_gradient_value = C.default_use_mean_gradient_value()
    assert use_mean_gradient_value

    C.adadelta(res.parameters, lr_per_sample)
コード例 #39
0
def train_model(model_details, num_classes, train_map_file, learning_params, max_images=-1):
    num_epochs = learning_params["max_epochs"]
    epoch_size = sum(1 for _ in open(train_map_file))
    if max_images > 0:
        epoch_size = min(epoch_size, max_images)
    mini_batch_size = learning_params["mb_size"]

    # Create the minibatch source and input variables
    mini_batch_source = create_mb_source(train_map_file, model_details["image_dims"], num_classes)
    image_input = cntk.input_variable(model_details["image_dims"])
    label_input = cntk.input_variable(num_classes)

    # Define mapping from reader streams to network inputs
    input_map = {
        image_input: mini_batch_source["features"],
        label_input: mini_batch_source["labels"],
    }

    # Instantiate the transfer learning model and loss function
    tl_model = create_model(
        model_details,
        num_classes,
        image_input,
        freeze=learning_params["freeze_weights"],
    )

    ce = cntk.cross_entropy_with_softmax(tl_model, label_input)
    pe = cntk.classification_error(tl_model, label_input)

    # Instantiate the trainer object
    lr_schedule = cntk.learning_parameter_schedule(learning_params["lr_per_mb"])
    mm_schedule = cntk.momentum_schedule(learning_params["momentum_per_mb"])
    learner = cntk.momentum_sgd(
        tl_model.parameters,
        lr_schedule,
        mm_schedule,
        l2_regularization_weight=learning_params["l2_reg_weight"],
    )
    trainer = cntk.Trainer(tl_model, (ce, pe), [learner])

    # Get mini_batches of images and perform model training
    print("Training transfer learning model for {0} epochs (epoch_size = {1}).".format(num_epochs, epoch_size))
    cntk.logging.log_number_of_parameters(tl_model)
    progress_printer = cntk.logging.ProgressPrinter(tag="Training", num_epochs=num_epochs)

    # Loop over epochs
    for epoch in range(num_epochs):
        sample_count = 0
        # Loop over mini_batches in the epoch
        while sample_count < epoch_size:
            data = mini_batch_source.next_minibatch(min(mini_batch_size, epoch_size - sample_count),
                                                    input_map=input_map)

            # Update model with it
            trainer.train_minibatch(data)

            # Count samples processed so far
            sample_count += trainer.previous_minibatch_sample_count
            progress_printer.update_with_trainer(trainer, with_metric=True)

            if sample_count % (100 * mini_batch_size) == 0:
                print("Processed {0} samples".format(sample_count))

        progress_printer.epoch_summary(with_metric=True)
    return tl_model
コード例 #40
0
ファイル: learner_test.py プロジェクト: junaidnaseer/CNTK
MOMENTUM_SCHEDULE_PARAMS = [
        ((0.2,), [0.2]),
        ((0.2,), [0.2, 0.2, 0.2, 0.2]),
        (([0.2,0.4], 5), [0.2]*5+[0.4]*20),
        (([(3,0.2),(2,0.4),(1,0.8)], 5), [0.2]*15+[0.4]*10+[0.8]*20),
        ]

LEARNER_LAMBDAS = [
    lambda params: C.adadelta(params),
    lambda params: C.adagrad(params, lr=learning_rate_schedule(1, UnitType.minibatch)),
    lambda params: C.adam(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)),
    lambda params: C.fsadagrad(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)),
    lambda params: C.nesterov(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)),
    lambda params: C.rmsprop(params, lr=learning_rate_schedule(1, UnitType.minibatch), gamma=0.1, inc=3.0, dec=0.1, max=np.inf, min=1e-8),
    lambda params: C.sgd(params, lr=learning_rate_schedule(1, UnitType.minibatch)),
    lambda params: C.momentum_sgd(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9))]

@pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS_LEGACY)
def test_learning_rate_schedule(params, expectation, minibatch_size):
    l = learning_rate_schedule(*params)
    assert l.minibatch_size == minibatch_size
    assert [l[i] for i in range(len(expectation))] == expectation

@pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS)
def test_learning_parameter_schedule(params, expectation, minibatch_size):
    l = learning_parameter_schedule(*params)
    assert l.minibatch_size == minibatch_size
    assert [l[i] for i in range(len(expectation))] == expectation


def sweep_based_schedule_fails():
コード例 #41
0
# training config
epoch_size = 6600  #12000 #15000
minibatch_size = 64

# Set training parameters
lr_per_minibatch = learning_rate_schedule([0.01] * 10 + [0.003] * 10 + [0.001],
                                          UnitType.minibatch, epoch_size)
momentum_time_constant = momentum_as_time_constant_schedule(-minibatch_size /
                                                            np.log(0.9))
l2_reg_weight = 0.001

# trainer objectS
progress_printer = ProgressPrinter(0)

learner = momentum_sgd(z.parameters,
                       lr=lr_per_minibatch,
                       momentum=momentum_time_constant,
                       l2_regularization_weight=l2_reg_weight)

# =============================================================================
# Create or RESTORE trainer
# =============================================================================
trainer = Trainer(z, (ce, pe), [learner], [progress_printer])
# trainer.restore_from_checkpoint(model_temp_file)

# define mapping from reader streams to network inputs
input_map = {
    input_var: reader_train.streams.features,
    label_var: reader_train.streams.labels
}

#progress_printer = ProgressPrinter(tag='Training')
コード例 #42
0
ファイル: learner_test.py プロジェクト: junaidnaseer/CNTK
def test_learner_init_legacy():
    i = C.input_variable(shape=(1,), needs_gradient=True, name='a')
    w = parameter(shape=(1,))

    res = i * w

    # for backcompatibility test
    # this will be deprecated in future version
    learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.sample))
    assert learner._learning_rate_schedule.minibatch_size == 1  # the deprecated per sample schedule should not use compatible mode
    assert learner.learning_rate() == 0.1

    # for backcompatibility test
    # this will be deprecated in future version
    # The UnitType will provide per minibatch instruction for the learner
    # this will be deprecated in future version
    learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.minibatch))
    assert learner.is_compatible_mode() == False
    assert learner.learning_rate() == 0.1
    assert learner.minibatch_size == C.learners.IGNORE
    assert learner._learning_rate_schedule.minibatch_size == 0

    # for backcompatibility test, in reset learning rate, the learner won't receive the reference minibatch size from the schedule
    # user will need to specify the reference minibatch size explicitly
    # this will be deprecated in future version
    learner = sgd(res.parameters, lr=0.1)
    learner.reset_learning_rate(learning_rate_schedule([1, 2, 3], UnitType.minibatch))
    assert learner.learning_rate() == 1.0
    learner.minibatch_size = C.learners.IGNORE  # reset to be per minibatch
    assert learner.minibatch_size == C.learners.IGNORE
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.is_compatible_mode() == True

    # for backcompatibility test
    # this will be deprecated in future version
    learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.sample), minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.learning_rate() == 0.1
    assert learner.minibatch_size == C.learners.IGNORE  # the learner's reference minibatch size is still 0

    # this will be deprecated in future version: This is logical invalid combination but it was the only way to use mean gradient and set learning rate in the past.
    learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.sample), use_mean_gradient=True)
    assert learner.is_compatible_mode() == True
    assert learner.learning_rate() == 0.1
    #test the override in the new version
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.minibatch_size == C.learners.IGNORE  # the learner's reference minibatch size is still 0


    # for backcompatibility test
    # this will be deprecated in future version
    # The UnitType will provide per minibatch instruction for the learner
    # this will be deprecated in future version
    learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.minibatch), minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.learning_rate() == 0.1
    assert learner.minibatch_size == C.learners.IGNORE
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE

    # for backcompatibility test, in reset learning rate, the learner won't receive the reference minibatch size from the schedule
    # user will need to specify the reference minibatch size explicitly
    # this will be deprecated in future version
    learner = sgd(res.parameters, lr=0.1)
    learner.reset_learning_rate(learning_rate_schedule([1, 2, 3], UnitType.minibatch))
    assert learner.learning_rate() == 1.0
    learner.minibatch_size = C.learners.IGNORE  # reset to be per minibatch
    assert learner.minibatch_size == C.learners.IGNORE
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.is_compatible_mode() == True

    learner_parameter = learner.parameters
    from cntk.variables import Parameter
    param = learner_parameter[0]
    assert isinstance(param, Parameter)

    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    # back compatible API test
    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = learning_parameter_schedule(0.1, minibatch_size=1)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant, unit_gain_value)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant, unit_gain=unit_gain_value)

    C.set_default_unit_gain_value(False)
    unit_gain_value = C.default_unit_gain_value()
    assert not unit_gain_value

    C.set_default_unit_gain_value(True)
    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    lr_per_sample = learning_rate_schedule([(3, 0.1), (2, 0.2), (1, 0.3)], unit=UnitType.sample)
    C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant)
    C.fsadagrad(res.parameters, lr_per_sample, momentum_time_constant, unit_gain_value)
    C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant, unit_gain=unit_gain_value)

    gamma, inc, dec, max, min = [0.5, 1.2, 0.7, 10, 1e-8]
    lr_per_sample = learning_rate_schedule([0.1, 0.2], unit=UnitType.sample, epoch_size=100)
    C.rmsprop(res.parameters, lr_per_sample, gamma, inc, dec, max, min, True)

    C.adadelta(res.parameters, lr_per_sample, use_mean_gradient=True)
コード例 #43
0
ファイル: learner_test.py プロジェクト: AllanYiin/CNTK
MOMENTUM_SCHEDULE_PARAMS = [
        ((0.2,), [0.2]),
        ((0.2,), [0.2, 0.2, 0.2, 0.2]),
        (([0.2,0.4], 5), [0.2]*5+[0.4]*20),
        (([(3,0.2),(2,0.4),(1,0.8)], 5), [0.2]*15+[0.4]*10+[0.8]*20),
        ]

LEARNER_LAMBDAS = [
    lambda params: C.adadelta(params),
    lambda params: C.adagrad(params, lr=learning_parameter_schedule(1)),
    lambda params: C.adam(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9)),
    lambda params: C.fsadagrad(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9)),
    lambda params: C.nesterov(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9)),
    lambda params: C.rmsprop(params, lr=learning_parameter_schedule(1), gamma=0.1, inc=3.0, dec=0.1, max=np.inf, min=1e-8),
    lambda params: C.sgd(params, lr=learning_parameter_schedule(1)),
    lambda params: C.momentum_sgd(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9))]

@pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS_LEGACY)
def test_learning_rate_schedule(params, expectation, minibatch_size):
    l = learning_rate_schedule(*params)
    assert l.minibatch_size == minibatch_size
    assert [l[i] for i in range(len(expectation))] == expectation

@pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS)
def test_learning_parameter_schedule(params, expectation, minibatch_size):
    l = learning_parameter_schedule(*params)
    assert l.minibatch_size == minibatch_size
    assert [l[i] for i in range(len(expectation))] == expectation


def sweep_based_schedule_fails():
コード例 #44
0
def test_learner_init_legacy():
    i = C.input_variable(shape=(1, ), needs_gradient=True, name='a')
    w = parameter(shape=(1, ))

    res = i * w

    # for backcompatibility test
    # this will be deprecated in future version
    learner = sgd(res.parameters,
                  lr=learning_rate_schedule(0.1, UnitType.sample))
    assert learner._learning_rate_schedule.minibatch_size == 1  # the deprecated per sample schedule should not use compatible mode
    assert learner.learning_rate() == 0.1

    # for backcompatibility test
    # this will be deprecated in future version
    # The UnitType will provide per minibatch instruction for the learner
    # this will be deprecated in future version
    learner = sgd(res.parameters,
                  lr=learning_rate_schedule(0.1, UnitType.minibatch))
    assert learner.is_compatible_mode() == False
    assert learner.learning_rate() == 0.1
    assert learner.minibatch_size == C.learners.IGNORE
    assert learner._learning_rate_schedule.minibatch_size == 0

    # for backcompatibility test, in reset learning rate, the learner won't receive the reference minibatch size from the schedule
    # user will need to specify the reference minibatch size explicitly
    # this will be deprecated in future version
    learner = sgd(res.parameters, lr=0.1)
    learner.reset_learning_rate(
        learning_rate_schedule([1, 2, 3], UnitType.minibatch))
    assert learner.learning_rate() == 1.0
    learner.minibatch_size = C.learners.IGNORE  # reset to be per minibatch
    assert learner.minibatch_size == C.learners.IGNORE
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.is_compatible_mode() == True

    # for backcompatibility test
    # this will be deprecated in future version
    learner = sgd(res.parameters,
                  lr=learning_rate_schedule(0.1, UnitType.sample),
                  minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.learning_rate() == 0.1
    assert learner.minibatch_size == C.learners.IGNORE  # the learner's reference minibatch size is still 0

    # this will be deprecated in future version: This is logical invalid combination but it was the only way to use mean gradient and set learning rate in the past.
    learner = sgd(res.parameters,
                  lr=learning_rate_schedule(0.1, UnitType.sample),
                  use_mean_gradient=True)
    assert learner.is_compatible_mode() == True
    assert learner.learning_rate() == 0.1
    #test the override in the new version
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.minibatch_size == C.learners.IGNORE  # the learner's reference minibatch size is still 0

    # for backcompatibility test
    # this will be deprecated in future version
    # The UnitType will provide per minibatch instruction for the learner
    # this will be deprecated in future version
    learner = sgd(res.parameters,
                  lr=learning_rate_schedule(0.1, UnitType.minibatch),
                  minibatch_size=C.learners.IGNORE)
    assert learner.is_compatible_mode() == True
    assert learner.learning_rate() == 0.1
    assert learner.minibatch_size == C.learners.IGNORE
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE

    # for backcompatibility test, in reset learning rate, the learner won't receive the reference minibatch size from the schedule
    # user will need to specify the reference minibatch size explicitly
    # this will be deprecated in future version
    learner = sgd(res.parameters, lr=0.1)
    learner.reset_learning_rate(
        learning_rate_schedule([1, 2, 3], UnitType.minibatch))
    assert learner.learning_rate() == 1.0
    learner.minibatch_size = C.learners.IGNORE  # reset to be per minibatch
    assert learner.minibatch_size == C.learners.IGNORE
    assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE
    assert learner.is_compatible_mode() == True

    learner_parameter = learner.parameters
    from cntk.variables import Parameter
    param = learner_parameter[0]
    assert isinstance(param, Parameter)

    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    # back compatible API test
    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = learning_parameter_schedule(0.1, minibatch_size=1)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant)
    C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant,
                   unit_gain_value)
    C.momentum_sgd(res.parameters,
                   lr_per_sample,
                   momentum_time_constant,
                   unit_gain=unit_gain_value)

    C.set_default_unit_gain_value(False)
    unit_gain_value = C.default_unit_gain_value()
    assert not unit_gain_value

    C.set_default_unit_gain_value(True)
    unit_gain_value = C.default_unit_gain_value()
    assert unit_gain_value

    lr_per_sample = learning_rate_schedule([(3, 0.1), (2, 0.2), (1, 0.3)],
                                           unit=UnitType.sample)
    C.fsadagrad(res.parameters,
                lr=lr_per_sample,
                momentum=momentum_time_constant)
    C.fsadagrad(res.parameters, lr_per_sample, momentum_time_constant,
                unit_gain_value)
    C.fsadagrad(res.parameters,
                lr=lr_per_sample,
                momentum=momentum_time_constant,
                unit_gain=unit_gain_value)

    gamma, inc, dec, max, min = [0.5, 1.2, 0.7, 10, 1e-8]
    lr_per_sample = learning_rate_schedule([0.1, 0.2],
                                           unit=UnitType.sample,
                                           epoch_size=100)
    C.rmsprop(res.parameters, lr_per_sample, gamma, inc, dec, max, min, True)

    C.adadelta(res.parameters, lr_per_sample, use_mean_gradient=True)