Exemple #1
0
def create_trainer(network, epoch_size, num_quantization_bits, warm_up, progress_writers):
    ''' Create Trainer '''
    print('Creating the trainer.')
    # Differential Learning rate scheduler
    lr_schedule = C.learning_rate_schedule([2.5], unit=C.UnitType.minibatch)
    mm_schedule = C.momentum_schedule(0.9)
    l2_reg_weight = 0.001

    # Create the Adam learners
    learner = C.adam(network['output'].parameters,
                     lr_schedule,
                     mm_schedule,
                     l2_regularization_weight=l2_reg_weight,
                     unit_gain=False)

    # Compute the number of workers
    num_workers = C.distributed.Communicator.num_workers()
    print('Number of workers: {}'.format(num_workers))
    if num_workers > 1:
        parameter_learner = C.train.distributed.data_parallel_distributed_learner(learner, num_quantization_bits=num_quantization_bits)
        trainer = C.Trainer(network['output'], (network['ce'], network['pe']), parameter_learner, progress_writers)
    else:
        trainer = C.Trainer(network['output'], (network['ce'], network['pe']), learner, progress_writers)

    return trainer
def train(reader, model_func, max_epochs=10):

    # Instantiate the model function; x is the input (feature) variable
    model = model_func(x)

    # Instantiate the loss and error function
    loss, label_error = create_criterion_function_preferred(model, y)

    # training config
    epoch_size = 18000  # 18000 samples is half the dataset size
    minibatch_size = 70

    # LR schedule over epochs
    # In CNTK, an epoch is how often we get out of the minibatch loop to
    # do other stuff (e.g. checkpointing, adjust learning rate, etc.)
    # (we don't run this many epochs, but if we did, these are good values)
    lr_per_sample = [0.003] * 4 + [0.0015] * 24 + [0.0003]
    lr_per_minibatch = [lr * minibatch_size for lr in lr_per_sample]
    lr_schedule = C.learning_rate_schedule(lr_per_minibatch,
                                           C.UnitType.minibatch, epoch_size)

    # Momentum schedule
    momentum_as_time_constant = C.momentum_as_time_constant_schedule(700)

    # We use a the Adam optimizer which is known to work well on this dataset
    # Feel free to try other optimizers from
    # https://www.cntk.ai/pythondocs/cntk.learner.html#module-cntk.learner
    learner = C.sgd(
        parameters=model.parameters,
        lr=lr_schedule,
        #momentum=momentum_as_time_constant,
        gradient_clipping_threshold_per_sample=15,
        gradient_clipping_with_truncation=True)

    # Setup the progress updater
    progress_printer = C.logging.ProgressPrinter(tag='Training',
                                                 num_epochs=max_epochs)

    # Uncomment below for more detailed logging
    #progress_printer = ProgressPrinter(freq=100, first=10, tag='Training', num_epochs=max_epochs)

    # Instantiate the trainer
    trainer = C.Trainer(model, (loss, label_error), learner, progress_printer)

    # process minibatches and perform model training
    C.logging.log_number_of_parameters(model)

    t = 0
    for epoch in range(max_epochs):  # loop over epochs
        epoch_end = (epoch + 1) * epoch_size
        while t < epoch_end:  # loop over minibatches on the epoch
            data = reader.next_minibatch(
                minibatch_size,
                input_map={  # fetch minibatch
                    x: reader.streams.query,
                    y: reader.streams.slot_labels
                })
            trainer.train_minibatch(data)  # update model with it
            t += data[y].num_samples  # samples so far
        trainer.summarize_training_progress()
Exemple #3
0
 def create_distributed_learner(self, mode, config):
     local_learner = C.sgd(
         self.z.parameters,
         C.learning_rate_schedule(0.01, unit=C.learners.UnitType.sample))
     try:
         if mode == 'data_parallel':
             if config is None:
                 config = DataParallelConfig(num_quantization_bits=32,
                                             distributed_after=0)
             learner = C.data_parallel_distributed_learner(
                 local_learner,
                 num_quantization_bits=config.num_quantization_bits,
                 distributed_after=config.distributed_after)
         elif mode == 'block_momentum':
             if config is None:
                 # the default config to match data parallel SGD
                 config = BlockMomentumConfig(
                     block_momentum_as_time_constant=0,
                     block_learning_rate=1,
                     block_size=NUM_WORKERS,
                     distributed_after=0)
             learner = C.block_momentum_distributed_learner(
                 local_learner,
                 block_momentum_as_time_constant=config.
                 block_momentum_as_time_constant,
                 block_learning_rate=config.block_learning_rate,
                 block_size=config.block_size,
                 distributed_after=config.distributed_after)
         else:
             learner = local_learner
     except RuntimeError:
         learner = None
     return learner
Exemple #4
0
def test_usermbsource_training(tmpdir):
    input_dim = 1000
    num_output_classes = 5

    mbs = MyDataSource(input_dim, num_output_classes)

    from cntk import sequence, parameter, plus, cross_entropy_with_softmax, \
            classification_error, learning_rate_schedule, sgd, Trainer, \
            training_session, times, UnitType

    feature = sequence.input_variable(shape=(input_dim, ))
    label = C.input_variable(shape=(num_output_classes, ))
    p = parameter(shape=(input_dim, num_output_classes), init=10)
    z = times(sequence.reduce_sum(feature), p, name='z')
    ce = cross_entropy_with_softmax(z, label)
    errs = classification_error(z, label)

    lr_per_sample = learning_rate_schedule([0.3, 0.2, 0.1, 0.0],
                                           UnitType.sample)
    learner = sgd(z.parameters, lr_per_sample)
    trainer = Trainer(z, (ce, errs), [learner])
    input_map = {feature: mbs.fsi, label: mbs.lsi}

    session = training_session(trainer=trainer,
                               mb_source=mbs,
                               model_inputs_to_streams=input_map,
                               mb_size=4,
                               max_samples=20)
    session.train()

    assert trainer.total_number_of_samples_seen == 20
Exemple #5
0
 def __call__(self, parameters, opt_learning_rate=0.001, **kwargs):
     lr_per_minibatch = cntk.learning_rate_schedule(
         lr=opt_learning_rate, unit=cntk.UnitType.minibatch)
     momentum = cntk.momentum_schedule(momentum=0.99)
     return cntk.adam_sgd(parameters=parameters,
                          lr=lr_per_minibatch,
                          momentum=momentum)
Exemple #6
0
def main():
	cntk_info()
	args = arguments()
	get_data_tmp(folder='./data')
	test_data_available(folder='./data')

	model = Net()

	## CNTK code without formatting
	train_reader = create_reader(ctf_train_file, True, model.input_dim, model.num_output_classes)
	test_reader = create_reader(ctf_test_file, False, model.input_dim, model.num_output_classes)

	# Print the output shapes / parameters of different components
	print("Output Shape of the first convolution layer:", model.z.first_conv.shape)
	print("Bias value of the last dense layer:", model.z.classify.b.value)

	# Number of parameters in the network
	cntk.logging.log_number_of_parameters(model.z)

	# Instantiate the trainer object to drive the model training
	learning_rate = 0.2
	lr_schedule = cntk.learning_rate_schedule(learning_rate, cntk.UnitType.minibatch)
	learner = cntk.sgd(model.z.parameters, lr_schedule)
	trainer = cntk.Trainer(model.z, (model.loss, model.errs), [learner])

	for epoch in range(args.epochs):
		train(epoch, model, train_reader, trainer, args)
		test(epoch, model, test_reader, trainer, args)
def create_trainer(network, epoch_size, num_quantization_bits):
    # Set learning parameters
    lr_per_sample = [0.0015625] * 20 + [0.00046875] * 20 + [
        0.00015625
    ] * 20 + [0.000046875] * 10 + [0.000015625]
    lr_schedule = cntk.learning_rate_schedule(
        lr_per_sample,
        unit=cntk.learner.UnitType.sample,
        epoch_size=epoch_size)
    mm_time_constant = [0] * 20 + [600] * 20 + [1200]
    mm_schedule = cntk.learner.momentum_as_time_constant_schedule(
        mm_time_constant, epoch_size=epoch_size)
    l2_reg_weight = 0.002

    # Create learner
    learner = data_parallel_distributed_learner(
        cntk.learner.momentum_sgd(network['output'].parameters,
                                  lr_schedule,
                                  mm_schedule,
                                  unit_gain=True,
                                  l2_regularization_weight=l2_reg_weight),
        num_quantization_bits=num_quantization_bits,
        distributed_after=0)

    # Create trainer
    return cntk.Trainer(network['output'], network['ce'], network['pe'],
                        learner)
def create_trainer(network, epoch_size, num_quantization_bits, block_size, warm_up, progress_writers):
    # Set learning parameters
    lr_per_sample     = [0.0015625]*20 + [0.00046875]*20 + [0.00015625]*20 + [0.000046875]*10 + [0.000015625]
    lr_schedule       = C.learning_rate_schedule(lr_per_sample, unit=C.learners.UnitType.sample, epoch_size=epoch_size)
    mm_time_constant  = [0]*20 + [600]*20 + [1200]
    mm_schedule       = C.learners.momentum_as_time_constant_schedule(mm_time_constant, epoch_size=epoch_size)
    l2_reg_weight     = 0.002

    # Create learner
    if block_size != None and num_quantization_bits != 32:
        raise RuntimeError("Block momentum cannot be used with quantization, please remove quantized_bits option.")

    local_learner = C.learners.momentum_sgd(network['output'].parameters,
                                            lr_schedule, mm_schedule,
                                            l2_regularization_weight=l2_reg_weight)

    if block_size != None:
        parameter_learner = C.train.distributed.block_momentum_distributed_learner(local_learner, block_size=block_size)
    else:
        parameter_learner = C.train.distributed.data_parallel_distributed_learner(local_learner, 
                                                                                  num_quantization_bits=num_quantization_bits, 
                                                                                  distributed_after=warm_up)

    # Create trainer
    return C.Trainer(network['output'], (network['ce'], network['pe']), parameter_learner, progress_writers)
Exemple #9
0
def train(streamf):
    global net
    net=nn(input_var)
    loss = cntk.losses.squared_error(net,label_var)
    error=cntk.squared_error(net,label_var)
    learning_rate=0.01
    lr_schedule=cntk.learning_rate_schedule(learning_rate,cntk.UnitType.minibatch)
    momentum_time_constant = cntk.momentum_as_time_constant_schedule(140 / -np.math.log(0.9))
    learner=cntk.fsadagrad(net.parameters,lr=lr_schedule,momentum = momentum_time_constant,unit_gain = True)
    progres=cntk.logging.ProgressPrinter(0)
    trainer=cntk.Trainer(net,(loss,error),[learner],progress_writers=progres)
    input_map={
        input_var : streamf.streams.features,
        label_var : streamf.streams.labels
        
    }
    minibatch_size =  1024
    max_epochs = 500
    epoch_size = 48985
    t = 0
    for epoch in range(max_epochs):
        epoch_end = (epoch+1) * epoch_size
        while t < epoch_end: 
            dat1=streamf.next_minibatch(minibatch_size,input_map = input_map)
            trainer.train_minibatch(dat1)
            t += dat1[label_var].num_samples
    trainer.summarize_training_progress()
    return trainer
Exemple #10
0
    def __init__(self, n_in, n_out, init_lr, momentum):

        self.param1 = 512
        self.param2 = 256

        self.n_in = int(n_in)
        self.n_out = int(n_out)
        self.input = C.sequence.input_variable(shape=(self.n_in,))
        self.label = C.sequence.input_variable(shape=(self.n_out,))

        self.three_dnn = C.layers.Sequential([
            C.layers.Dense(self.param1, activation=C.tanh, name='dnn_three_1'),
            C.layers.Dense(self.param1, activation=C.tanh, name='dnn_three_2'),
            C.layers.Dense(self.param1, activation=C.tanh, name='dnn_three_3')])
        self.final_dnn = C.layers.Dense(self.n_out, name='dnn_final')
        self.dnn_1 = C.layers.Dense(8 * self.param2, bias=False, name='dnn_1')
        self.dnn_2 = C.layers.Dense(8 * self.param2, bias=False, name='dnn_2')
        self.dnn_3 = C.layers.Dense(8 * self.param2, bias=False, name='dnn_3')
        self.dnn_4 = C.layers.Dense(8 * self.param2, bias=False, name='dnn_4')
        self.list_bias = []
        for i in xrange(16):
            self.list_bias.append(C.parameter(shape=(self.param2, ), name='bias_' + str(i)))

        self.output = self.model(self.input)

        self.loss = loss_fun(self.output, self.label)
        self.eval_err = loss_fun(self.output, self.label)

        self.lr_s = C.learning_rate_schedule(init_lr, C.UnitType.sample)
        self.mom_s = C.momentum_schedule(momentum)
        self.learner = C.momentum_sgd(self.output.parameters, lr=self.lr_s, momentum=self.mom_s)
        self.trainer = C.Trainer(self.output, (self.loss, self.eval_err), [self.learner])
def test_model_not_criterion_subset():
    input_dim = 2
    proj_dim = 11
    model1_dim = 3
    model2_dim = 4
    x = sequence.input_variable((input_dim,))

    core = C.layers.Embedding(proj_dim)
    model1 = C.layers.Dense(model1_dim)(sequence.last(core(x)))
    model1_label = C.input_variable((model1_dim,))
    ce_model1 = cross_entropy_with_softmax(model1, model1_label)
    pe_model1 = classification_error(model1, model1_label)

    model2 = C.layers.Dense(model2_dim)(core(x))
    model2_label = sequence.input_variable((model2_dim,))
    ce_model2 = cross_entropy_with_softmax(model2, model2_label)
    pe_model2 = classification_error(model2, model2_label)

    ce = 0.5 * sequence.reduce_sum(ce_model2) + 0.5 * ce_model1

    lr_schedule = C.learning_rate_schedule(0.003, C.UnitType.sample)
    trainer_multitask = C.Trainer(model1, (ce, pe_model1), C.sgd(ce.parameters, lr=lr_schedule))

    x_data = np.asarray([[2., 1.], [1., 2.]], np.float32)
    model1_label_data = np.asarray([1., 0., 0.], np.float32)
    model2_label_data = np.asarray([[0., 1., 0., 0.], [0., 0., 0., 1.]], np.float32)
    trainer_multitask.train_minibatch({x : [x_data], model1_label : [model1_label_data], model2_label : [model2_label_data]})
def test_trainer_with_some_params_not_learned():
    input_dim = 2
    proj_dim = 2
    x = C.input_variable(shape=(input_dim,))
    W = parameter(shape=(input_dim, proj_dim), init=C.glorot_uniform())
    B = parameter(shape=(proj_dim,), init=C.glorot_uniform())
    t = times(x, W)
    z = t + B

    W_orig_value = W.value
    B_orig_value = B.value

    labels = C.input_variable(shape=(proj_dim,))
    ce = cross_entropy_with_softmax(z, labels)
    pe = classification_error(z, labels)

    lr_per_sample = C.learning_rate_schedule(0.1, C.UnitType.sample)
    trainer = C.Trainer(z, (ce, pe), C.sgd([W], lr_per_sample))

    x_value = [[1, 1],[2, 2]]
    label_value = [[0, 1], [1, 0]]
    arguments = {x: x_value, labels: label_value}

    num_iters = 3
    for i in range(num_iters):
        trainer.train_minibatch(arguments)

        assert np.array_equal(B.value, B_orig_value)
        assert not np.array_equal(W.value, W_orig_value)
        W_orig_value = W.value

    trainer.test_minibatch(arguments)
Exemple #13
0
def create_network(para, verbose=False):
    with cntk.layers.default_options(init=cntk.glorot_uniform(), activation=cntk.ops.relu):
        # In order to accelerate the debugging step, we choose a simple structure with only 2 parameters

        h = cntk.layers.Convolution2D(filter_shape=(5, 5), num_filters=para[0],
                                      strides=(1, 1), pad=True, name='C1')(network_input / 255.0)
        h = cntk.layers.layers.MaxPooling(filter_shape=(5, 5), strides=(2, 2), )(h)

        h = cntk.layers.Convolution2D(filter_shape=(5, 5), num_filters=para[1],
                                      strides=(1, 1), pad=True, name='C2')(h)
        h = cntk.layers.layers.MaxPooling(filter_shape=(5, 5), strides=(2, 2))(h)

        h = cntk.layers.Convolution2D(filter_shape=(3, 3), num_filters=para[2],
                                      strides=(1, 1), pad=True, name='C2')(h)

        h = cntk.layers.Dense(para[3])(h)

        h = cntk.layers.Dropout(0.25)(h)

        z = cntk.layers.Dense(10, activation=None, name='R')(h)
    loss = cntk.cross_entropy_with_softmax(z, network_label)
    label_error = cntk.classification_error(z, network_label)
    lr_schedule = cntk.learning_rate_schedule(0.1, cntk.UnitType.minibatch)
    learner = cntk.momentum_sgd(z.parameters, lr_schedule, cntk.momentum_schedule(0.9))
    trainer = cntk.Trainer(z, (loss, label_error), [learner])
    if verbose: log = cntk.logging.ProgressPrinter(100)
    for _ in xrange(20000):
        data = train_reader.next_minibatch(100, input_map=mapping(train_reader))
        trainer.train_minibatch(data)
        if verbose: log.update_with_trainer(trainer)
    return trainer
def test_trainer(tmpdir, no_eval_function):
    in1 = C.input_variable(shape=(1,))
    labels = C.input_variable(shape=(1,))
    p = parameter(shape=(2,), init=10)
    z = plus(in1, reduce_sum(p), name='z')
    ce = cross_entropy_with_softmax(z, labels)
    if no_eval_function:
        errs = None
    else:
        errs = classification_error(z, labels)

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample)
    trainer = C.Trainer(z, (ce, errs),
            [C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)])
    in1_value = [[1],[2]]
    label_value = [[0], [1]]
    arguments = {in1: in1_value, labels: label_value}
    z_output = z.output
    updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output])

    p = str(tmpdir / 'checkpoint.dat')
    external_state = {"additional external state":math.pi, "nested dict":{"a":"b"}, "list":[1,2,3]}
    trainer.save_checkpoint(p, external_state)
    restored_state = trainer.restore_from_checkpoint(p)

    assert external_state == restored_state

    assert trainer.model.name == 'z'

    # Ensure that Swig is not leaking raw types
    assert isinstance(trainer.model, Function)
    assert trainer.model.__doc__
    assert isinstance(trainer.parameter_learners[0], C.Learner)
Exemple #15
0
def create_trainer(network, epoch_size, num_quantization_bits,
                   progress_printer):
    # Set learning parameters
    lr_per_mb = [0.01] * 20 + [0.001] * 20 + [0.0001] * 20 + [0.00001] * 10 + [
        0.000001
    ]
    lr_schedule = cntk.learning_rate_schedule(
        lr_per_mb, unit=cntk.learner.UnitType.minibatch, epoch_size=epoch_size)
    mm_schedule = cntk.learner.momentum_schedule(0.9)
    l2_reg_weight = 0.0005  # CNTK L2 regularization is per sample, thus same as Caffe

    # Create learner
    local_learner = cntk.learner.momentum_sgd(
        network['output'].parameters,
        lr_schedule,
        mm_schedule,
        unit_gain=False,
        l2_regularization_weight=l2_reg_weight)
    # Since we reuse parameter settings (learning rate, momentum) from Caffe, we set unit_gain to False to ensure consistency
    parameter_learner = data_parallel_distributed_learner(
        local_learner,
        num_quantization_bits=num_quantization_bits,
        distributed_after=0)

    # Create trainer
    return cntk.Trainer(network['output'], (network['ce'], network['pe']),
                        parameter_learner, progress_printer)
def init_model(m):
    progress_writers = [
        cntk.logging.ProgressPrinter(
            freq=int(BATCHSIZE / 2),
            rank=cntk.train.distributed.Communicator.rank(),
            num_epochs=EPOCHS)
    ]

    # Loss (dense labels); check if support for sparse labels
    loss = cntk.cross_entropy_with_softmax(m, labels)
    # Momentum SGD
    # https://github.com/Microsoft/CNTK/blob/master/Manual/Manual_How_to_use_learners.ipynb
    # unit_gain=False: momentum_direction = momentum*old_momentum_direction + gradient
    # if unit_gain=True then ...(1-momentum)*gradient
    local_learner = cntk.momentum_sgd(
        m.parameters,
        lr=cntk.learning_rate_schedule(LR, cntk.UnitType.minibatch),
        momentum=cntk.momentum_schedule(MOMENTUM),
        unit_gain=False)

    distributed_learner = cntk.train.distributed.data_parallel_distributed_learner(
        local_learner)

    trainer = cntk.Trainer(m, (loss, cntk.classification_error(m, labels)),
                           [distributed_learner], progress_writers)

    return trainer, distributed_learner
Exemple #17
0
def test_ext_backpropstate(payload):
    class TestBackPropState(UserFunction):
        def __init__(self, arg, payload, name='f1'):
            self.payload = payload
            super(TestBackPropState, self).__init__([arg])

        def infer_outputs(self):
            return [
                C.output_variable(self.inputs[0].shape, self.inputs[0].dtype,
                                  self.inputs[0].dynamic_axes)
            ]

        def forward(self, argument, device=None, outputs_to_retain=None):
            return self.payload, argument

        def backward(self, state, root_gradients):
            assert state == self.payload
            return root_gradients

    dim = 4

    p = C.parameter(shape=(dim, ), init=10)
    in1 = C.input_variable(dim, needs_gradient=True, name='i_var')
    m = C.user_function(TestBackPropState(in1, payload))
    z = m + p

    lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample)
    trainer = C.Trainer(None, (z), [C.sgd(z.parameters, lr_per_sample)])

    for i in range(100):
        input_data = np.random.rand(dim)
        trainer.train_minibatch({in1: [input_data]})
Exemple #18
0
def train(streamf):
    global net
    minibatch_size = 1024
    max_epochs = 2000
    epoch_size = 50000
    net = nn(input_var)
    loss = cntk.losses.binary_cross_entropy(net, label_var)
    error = cntk.classification_error(net, label_var)
    lr_per_sample = [3e-4] * 4 + [1.5e-4]
    lr_per_minibatch = [lr * minibatch_size for lr in lr_per_sample]
    lr_schedule = cntk.learning_rate_schedule(lr_per_minibatch,
                                              cntk.UnitType.minibatch)
    momentum_as_time_constant = cntk.momentum_as_time_constant_schedule(700)
    learner = cntk.adam(net.parameters,
                        lr_schedule,
                        momentum=momentum_as_time_constant,
                        gradient_clipping_threshold_per_sample=15,
                        gradient_clipping_with_truncation=True)
    progres = cntk.logging.ProgressPrinter(0)
    trainer = cntk.Trainer(net, (loss, error), [learner],
                           progress_writers=progres)
    input_map = {
        input_var: streamf.streams.features,
        label_var: streamf.streams.labels
    }
    t = 0
    for epoch in range(max_epochs):
        epoch_end = (epoch + 1) * epoch_size
        while t < epoch_end:
            dat1 = streamf.next_minibatch(minibatch_size, input_map=input_map)
            trainer.train_minibatch(dat1)
            t += dat1[label_var].num_samples
    trainer.summarize_training_progress()
    return trainer
def test_empty_minibatch():
    scalar = C.input_variable((1,), dtype=np.float32, name='tscalar')
    op = scalar + parameter(init=np.asarray([1]), dtype=np.float32)

    lr_per_sample = C.learning_rate_schedule(0.1, C.UnitType.sample)
    trainer = C.Trainer(op, (op, None), C.sgd(op.parameters, lr_per_sample))
    trainer.train_minibatch({})
Exemple #20
0
def test_ext_backpropstate(payload):

    class TestBackPropState(UserFunction):
        def __init__(self, arg, payload, name='f1'):
            self.payload = payload
            super(TestBackPropState, self).__init__([arg])

        def infer_outputs(self):
            return [C.output_variable(self.inputs[0].shape, self.inputs[0].dtype, self.inputs[0].dynamic_axes)]

        def forward(self, argument, device=None, outputs_to_retain=None):
            return self.payload, argument

        def backward(self, state, root_gradients):
            assert state == self.payload
            return root_gradients

    dim = 4

    p = C.parameter(shape=(dim,), init=10)
    in1 = C.input_variable(dim, needs_gradient=True, name='i_var')
    m = C.user_function(TestBackPropState(in1, payload))
    z = m + p

    lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample)
    trainer = C.Trainer(None, (z), [C.sgd(z.parameters, lr_per_sample)])

    for i in range(100):
        input_data = np.random.rand(dim)
        trainer.train_minibatch({in1: [input_data]})
 def __init__(self,
              feature_vector,
              target_vector,
              action_vector,
              velocity,
              load_model=True,
              testing=False,
              max_velocity=0.31,
              learning_rate=0.5,
              name='action_predicter'):
     self._load_model = load_model
     self._input_size = feature_vector
     self._output_size = action_vector
     self._target_size = target_vector
     self._velocity_size = velocity
     self._input = C.sequence.input_variable(self._input_size)
     self._target = C.sequence.input_variable(self._target_size)
     self._output = C.sequence.input_variable(self._output_size)
     self._output_velocity = C.sequence.input_variable(self._velocity_size)
     self.name = name
     self._max_velocity = max_velocity
     self._batch_size = 8
     self._max_iter = 1000000
     self._lr_schedule = C.learning_rate_schedule(
         [learning_rate * (0.999**i) for i in range(1000)],
         C.UnitType.sample,
         epoch_size=self._max_iter * self._batch_size)
     #self._model,self._loss, self._learner, self._trainer = self.create_model()
     if testing:
         self._model = self.load_models()
     else:
         self._model, self._loss, self._learner, self._trainer = self.create_model(
         )
     self._predicted = {}
def create_trainer(network, epoch_size, num_quantization_bits, block_size, warm_up, progress_writers):
    # Set learning parameters
    lr_per_sample     = [0.0015625]*20 + [0.00046875]*20 + [0.00015625]*20 + [0.000046875]*10 + [0.000015625]
    lr_schedule       = C.learning_rate_schedule(lr_per_sample, unit=C.learners.UnitType.sample, epoch_size=epoch_size)
    mm_time_constant  = [0]*20 + [600]*20 + [1200]
    mm_schedule       = C.learners.momentum_as_time_constant_schedule(mm_time_constant, epoch_size=epoch_size)
    l2_reg_weight     = 0.002

    # Create learner
    if block_size != None and num_quantization_bits != 32:
        raise RuntimeError("Block momentum cannot be used with quantization, please remove quantized_bits option.")

    local_learner = C.learners.momentum_sgd(network['output'].parameters,
                                            lr_schedule, mm_schedule,
                                            l2_regularization_weight=l2_reg_weight)

    if block_size != None:
        parameter_learner = C.train.distributed.block_momentum_distributed_learner(local_learner, block_size=block_size)
    else:
        parameter_learner = C.train.distributed.data_parallel_distributed_learner(local_learner, 
                                                                                  num_quantization_bits=num_quantization_bits, 
                                                                                  distributed_after=warm_up)

    # Create trainer
    return C.Trainer(network['output'], (network['ce'], network['pe']), parameter_learner, progress_writers)
def create_trainer(network, epoch_size, num_epochs, minibatch_size, num_quantization_bits, progress_printer):
    
    # CNTK weights new gradient by (1-momentum) for unit gain, 
    # thus we divide Caffe's learning rate by (1-momentum)
    initial_learning_rate = 2.0 # equal to 0.2 in caffe
    initial_learning_rate *= minibatch_size / 128
    learn_rate_adjust_interval = 2
    learn_rate_decrease_factor = 0.94

    # Set learning parameters
    lr_per_mb = []
    learning_rate = initial_learning_rate
    for i in range(0, num_epochs, learn_rate_adjust_interval):
        lr_per_mb.extend([learning_rate] * learn_rate_adjust_interval)
        learning_rate *= learn_rate_decrease_factor

    lr_schedule       = cntk.learning_rate_schedule(lr_per_mb, unit=cntk.learner.UnitType.minibatch, epoch_size=epoch_size)
    mm_schedule       = cntk.learner.momentum_schedule(0.9)
    l2_reg_weight     = 0.0001 # CNTK L2 regularization is per sample, thus same as Caffe
    
    # Create learner
    local_learner = cntk.learner.momentum_sgd(network['output'].parameters, lr_schedule, mm_schedule, 
                                                l2_regularization_weight=l2_reg_weight)
    parameter_learner = data_parallel_distributed_learner(
        local_learner, 
        num_quantization_bits=num_quantization_bits,
        distributed_after=0)

    # Create trainer
    return cntk.Trainer(network['output'], (network['ce'], network['pe']), parameter_learner, progress_printer)
Exemple #24
0
    def train(self, report_freq = 500, as_policy=True):        
        #loss = C.ops.minus(0, C.ops.argmin(self.model) -  C.ops.argmin(self.model) + C.ops.minus(self.label_var, 0))
        loss = C.squared_error(self.model, self.label_var)
        evaluation = C.squared_error(self.model, self.label_var)
        schedule = C.momentum_schedule(self.hp.learning_rate)
        progress_printer = C.logging.ProgressPrinter(num_epochs=self.hp.epochs/self.hp.minibatch_size)
        learner = C.adam(self.model.parameters, 
                     C.learning_rate_schedule(self.hp.learning_rate, C.UnitType.minibatch), 
                     momentum=schedule, 
                     l1_regularization_weight=self.hp.l1reg,
                     l2_regularization_weight=self.hp.l2reg
                     )
        trainer = C.Trainer(self.model, (loss, evaluation), learner, progress_printer)
        self.plotdata = {"loss":[]}
        for epoch in range(self.hp.epochs):             
             indata, label, total_reward = self.get_next_data(self.hp.minibatch_size, as_policy)
             data = {self.input_var: indata, self.label_var: label}
             trainer.train_minibatch(data)
             loss = trainer.previous_minibatch_loss_average
             if not (loss == "NA"):
                self.plotdata["loss"].append(loss)
             if epoch % report_freq == 0:
                 print()
                 print("last epoch total reward: {}".format(total_reward))
                 trainer.summarize_training_progress()
                 print()
#             if self.hp.stop_loss > loss:
#                 break
        print()
        trainer.summarize_training_progress()
Exemple #25
0
def train(train_reader, test_reader, model_func, num_sweeps_to_train_with=10):
    
    model = model_func(x/255)

    loss, label_error = create_criterion_function(model, y)

    learning_rate = 0.2
    lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch)
    learner = C.sgd(z.parameters, lr_schedule)
    trainer = C.Trainer(z, (loss, label_error), [learner])

    minibatch_size = util.BATCH_SIZE
    num_sumples_per_sweep = util.N
    num_minibatches_to_train = util.EPOCHS

    input_map = {
        y : train_reader.streams.labels,
        x : train_reader.streams.features
    }

    training_progress_output_freq = 500

    train_loss = []
    train_acc = []

    for i in range(0, int(num_minibatches_to_train)):
        
        data = train_reader.next_minibatch(minibatch_size, input_map=input_map)
        trainer.train_minibatch(data)
        print_training_progress(trainer, i, training_progress_output_freq, verbose=1)

        train_loss.append(trainer.previous_minibatch_loss_average)
        train_acc.append(1 - trainer.previous_minibatch_evaluation_average)

    return train_loss, train_acc
 def __init__(self,
              feature_vector,
              target_vector,
              action_vector,
              velocity,
              max_velocity,
              learning_rate,
              name='action_predicter'):
     self._load_model = False
     self._input_size = (feature_vector[0] + 1, feature_vector[1])
     self._output_size = action_vector
     self._target_size = target_vector
     self._velocity_size = velocity
     self._input = C.sequence.input_variable(self._input_size)
     self._target = C.sequence.input_variable(self._target_size)
     self._output = C.sequence.input_variable(self._output_size)
     self._output_velocity = C.sequence.input_variable(self._velocity_size)
     self.name = name
     self._max_velocity = max_velocity
     self._batch_size = 8
     self._max_iter = 1000000
     self._lr_schedule = C.learning_rate_schedule(
         [learning_rate * (0.995**i) for i in range(10000)],
         C.UnitType.sample,
         epoch_size=round(self._max_iter * self._batch_size / 100))
     self._model, self._loss, self._learner, self._trainer = self.create_model(
     )
     self._predicted = {}
Exemple #27
0
def test_session_progress_print_on_sweep_unit(tmpdir, device_id):
    device = cntk_device(device_id)
    writer = MockProgressWriter()
    #set to a higher learning rate as we don't need to have converge but just to go through all the samples
    t, feature, label = create_sample_model(
        device,
        writer,
        lr_per_sample=C.learning_rate_schedule(0.3, C.UnitType.sample))
    mbs = mb_source(
        tmpdir,
        "training",
        #max_samples=INFINITELY_REPEAT,
        max_sweeps=4)

    input_map = {feature: mbs.streams.features, label: mbs.streams.labels}

    test_dir = str(tmpdir)

    C.training_session(
        trainer=t,
        mb_source=mbs,
        mb_size=C.minibatch_size_schedule(5),
        model_inputs_to_streams=input_map,
        max_samples=FULL_DATA_SWEEP,
        progress_frequency=(2, C.train.DataUnit.sweep)).train(device)
    #4 sweeps of 25 samples = 100 samples
    assert (t.total_number_of_samples_seen == 100)
    #output every 2 epoch sweeps; 4 sweeps in total, at the end 2 outputs are written:
    assert (writer.training_summary_counter == 2)
Exemple #28
0
def trainDNN(trainX, trainY):
    numOutputClasses = 2

    newCol = np.where(trainY == 0, 1, 0)
    newCol = pd.DataFrame(newCol)
    trainY = trainY.reset_index(drop=True)
    trainY = pd.concat([trainY, newCol], axis=1, ignore_index=True)
    inputDim = trainX.shape[1]
    trainX = np.ascontiguousarray(trainX.as_matrix().astype(np.float32))
    trainY = np.ascontiguousarray(trainY.as_matrix().astype(np.float32))

    input = C.input_variable(inputDim)
    label = C.input_variable(numOutputClasses)

    classifier = create_model(input)
    loss = C.cross_entropy_with_softmax(classifier, label)
    evalError = C.classification_error(classifier, label)

    learning_rate = 0.5
    lrSchedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch)
    learner = C.sgd(classifier.parameters, lrSchedule)
    trainer = C.Trainer(classifier, (loss, evalError), [learner])

    minibatchSize = 25
    numSamples = trainX.shape[0] - (trainX.shape[0] % 25)
    numMinibatchesToTrain = numSamples / minibatchSize

    #train the model
    for i in range(0, int(numMinibatchesToTrain)):
        trainX, trainY, features, labels = getMinibatch(
            trainX, trainY, minibatchSize)
        trainer.train_minibatch({input: features, label: labels})

    return [classifier, trainer, input, label]
Exemple #29
0
def create_trainer(network, epoch_size, num_quantization_bits, warm_up,
                   progress_writers):
    print('Creating the trainer.')
    # Train only the last layers
    lr_schedule = C.learning_rate_schedule([0.01] * 10 + [0.001] * 20 +
                                           [0.0001] * 30,
                                           unit=C.UnitType.minibatch)
    mm_schedule = C.momentum_schedule(0.9)
    l2_reg_weight = 0.0001

    learner = C.adam(network['output'].parameters,
                     lr_schedule,
                     mm_schedule,
                     l2_regularization_weight=l2_reg_weight,
                     unit_gain=False)

    num_workers = C.distributed.Communicator.num_workers()
    print('Number of workers: {}'.format(num_workers))
    if num_workers > 1:
        parameter_learner = C.train.distributed.data_parallel_distributed_learner(
            learner, num_quantization_bits=num_quantization_bits)
        trainer = C.Trainer(network['output'], (network['ce'], network['pe']),
                            parameter_learner, progress_writers)
    else:
        trainer = C.Trainer(network['output'], (network['ce'], network['pe']),
                            learner, progress_writers)

    return trainer
Exemple #30
0
def create_trainer():
    loss, label_error = create_criterion_function_preferred(dec, y)

    schedule_step = print_freq
    lr_per_sample = [2e-3] * 2 * schedule_step + [1e-3] * 2 * schedule_step + [
        5e-4
    ]
    lr_per_minibatch = [lr * minibatch_size for lr in lr_per_sample]
    lr_schedule = C.learning_rate_schedule(lr_per_minibatch,
                                           C.UnitType.minibatch, epoch_size)

    momentum_as_time_constant = C.momentum_as_time_constant_schedule(1000)
    learner = C.adam(parameters=dec.parameters,
                     lr=lr_schedule,
                     momentum=momentum_as_time_constant,
                     gradient_clipping_threshold_per_sample=15,
                     gradient_clipping_with_truncation=True)

    progress_printer = C.logging.ProgressPrinter(tag='Training',
                                                 num_epochs=num_epochs)
    trainer = C.Trainer(dec, (loss, label_error), learner, progress_printer)
    if restore:
        trainer.restore_from_checkpoint("model-5.cntk")
    C.logging.log_number_of_parameters(dec)
    return trainer
Exemple #31
0
def train(streamf):
    global net
    minibatch_size = 512
    max_epochs = 2000
    epoch_size = 48985
    net = nn(input_s, input_h, input_l, input_v)
    loss = cntk.losses.cross_entropy_with_softmax(net, label_var)
    error = cntk.classification_error(net, label_var)
    lr_per_sample = [3e-4] * 4 + [1.5e-4]
    lr_per_minibatch = [lr * minibatch_size for lr in lr_per_sample]
    lr_schedule = cntk.learning_rate_schedule(lr_per_minibatch,
                                              cntk.UnitType.minibatch)
    momentum_as_time_constant = cntk.momentum_as_time_constant_schedule(700)
    learner = cntk.fsadagrad(net.parameters, lr_schedule,
                             momentum_as_time_constant)
    progres = cntk.logging.ProgressPrinter(0)
    trainer = cntk.Trainer(net, (loss, error), [learner],
                           progress_writers=progres)
    input_map = {
        input_s: streamf.streams.spread,
        input_h: streamf.streams.high,
        input_l: streamf.streams.low,
        input_v: streamf.streams.volume,
        label_var: streamf.streams.labels
    }
    t = 0
    for epoch in range(max_epochs):
        epoch_end = (epoch + 1) * epoch_size
        while t < epoch_end:
            dat1 = streamf.next_minibatch(minibatch_size, input_map=input_map)
            trainer.train_minibatch(dat1)
            t += dat1[label_var].num_samples
    trainer.summarize_training_progress()
    return trainer
Exemple #32
0
def train(streamf):
    input_var = cntk.input_variable(45,np.float32, name = 'features',dynamic_axes=cntk.axis.Axis.default_input_variable_dynamic_axes())
    label_var=cntk.input_variable(3,np.float32, name = 'labels')
    net=nn(input_var)
    loss = cntk.squared_error(net,label_var)
    error=cntk.squared_error(net,label_var)
    learning_rate=0.02
    lr_schedule=cntk.learning_rate_schedule(learning_rate,cntk.UnitType.minibatch)
    momentum_time_constant = cntk.momentum_as_time_constant_schedule(5000 / -np.math.log(0.9))
    learner=cntk.fsadagrad(net.parameters,lr=lr_schedule,momentum = momentum_time_constant,unit_gain = True)
    progres=cntk.logging.ProgressPrinter(0)
    trainer=cntk.Trainer(net,(loss,error),[learner],progress_writers=progres)
    input_map={
        input_var : streamf.streams.features,
        label_var : streamf.streams.labels
        
    }
    minibatch_size =  5000
    num_samples_per_sweep = 2000
    for i in range(0,num_samples_per_sweep):
        dat1=streamf.next_minibatch(minibatch_size,input_map = input_map)
        trainer.train_minibatch(dat1)
        training_loss = trainer.previous_minibatch_loss_average
        eval_error = trainer.previous_minibatch_evaluation_average
        if training_loss<0.002:
            break
    return trainer
def create_trainer(network, epoch_size, num_epochs, minibatch_size, num_quantization_bits, progress_printer):
    
    # CNTK weights new gradient by (1-momentum) for unit gain, 
    # thus we divide Caffe's learning rate by (1-momentum)
    initial_learning_rate = 2.0 # equal to 0.2 in caffe
    initial_learning_rate *= minibatch_size / 128
    learn_rate_adjust_interval = 2
    learn_rate_decrease_factor = 0.94

    # Set learning parameters
    lr_per_mb = []
    learning_rate = initial_learning_rate
    for i in range(0, num_epochs, learn_rate_adjust_interval):
        lr_per_mb.extend([learning_rate] * learn_rate_adjust_interval)
        learning_rate *= learn_rate_decrease_factor

    lr_schedule       = cntk.learning_rate_schedule(lr_per_mb, unit=cntk.learner.UnitType.minibatch, epoch_size=epoch_size)
    mm_schedule       = cntk.learner.momentum_schedule(0.9)
    l2_reg_weight     = 0.0001 # CNTK L2 regularization is per sample, thus same as Caffe
    
    # Create learner
    local_learner = cntk.learner.momentum_sgd(network['output'].parameters, lr_schedule, mm_schedule, 
                                                l2_regularization_weight=l2_reg_weight)
    parameter_learner = data_parallel_distributed_learner(
        local_learner, 
        num_quantization_bits=num_quantization_bits,
        distributed_after=0)

    # Create trainer
    return cntk.Trainer(network['output'], (network['ce'], network['pe']), parameter_learner, progress_printer)
Exemple #34
0
 def set_optimizer(self, opt_type, opt_conf):
     if opt_type == 'SGD':
         self.lr_schedule = C.learning_rate_schedule(
             opt_conf['lr'], C.UnitType.minibatch)
         self.m_schedule = C.momentum_schedule(
             opt_conf['momentum'], C.UnitType.minibatch)
     else:
         raise NotImplementedError
Exemple #35
0
def test_htk_deserializers():
    mbsize = 640
    epoch_size = 1000 * mbsize
    lr = [0.001]

    feature_dim = 33
    num_classes = 132
    context = 2

    os.chdir(data_path)

    features_file = "glob_0000.scp"
    labels_file = "glob_0000.mlf"
    label_mapping_file = "state.list"

    fd = HTKFeatureDeserializer(
        StreamDefs(amazing_features=StreamDef(
            shape=feature_dim, context=(context, context), scp=features_file)))

    ld = HTKMLFDeserializer(
        label_mapping_file,
        StreamDefs(
            awesome_labels=StreamDef(shape=num_classes, mlf=labels_file)))

    reader = MinibatchSource([fd, ld])

    features = C.input_variable(((2 * context + 1) * feature_dim))
    labels = C.input_variable((num_classes))

    model = Sequential(
        [For(range(3), lambda: Recurrence(LSTM(256))),
         Dense(num_classes)])
    z = model(features)
    ce = C.cross_entropy_with_softmax(z, labels)
    errs = C.classification_error(z, labels)

    learner = C.adam_sgd(z.parameters,
                         lr=C.learning_rate_schedule(lr, C.UnitType.sample,
                                                     epoch_size),
                         momentum=C.momentum_as_time_constant_schedule(1000),
                         low_memory=True,
                         gradient_clipping_threshold_per_sample=15,
                         gradient_clipping_with_truncation=True)
    trainer = C.Trainer(z, (ce, errs), learner)

    input_map = {
        features: reader.streams.amazing_features,
        labels: reader.streams.awesome_labels
    }

    pp = C.ProgressPrinter(freq=0)
    # just run and verify it doesn't crash
    for i in range(3):
        mb_data = reader.next_minibatch(mbsize, input_map=input_map)
        trainer.train_minibatch(mb_data)
        pp.update_with_trainer(trainer, with_metric=True)
    assert True
    os.chdir(abs_path)
Exemple #36
0
 def create_trainer(use_sparse, device):
     a = C.input_variable(shape=input_shape, is_sparse=use_sparse, name='input')
     w = C.parameter(init=w_init, device=dev)
     z = times(a, w)
 
     l = C.input_variable(shape=label_shape, is_sparse=use_sparse, name='label')
     loss = cross_entropy_with_softmax(z, l, axis=-1)
     trainer = C.Trainer(z, (loss, None), C.sgd(z.parameters, lr=C.learning_rate_schedule(0.007, C.UnitType.sample)))
     return (a, l, w, trainer)
def adjust_lr_callback(index, average_error, cv_num_samples, cv_num_minibatches):
    global prev_metric
    if (prev_metric - average_error) / prev_metric < 0.05: # relative gain must reduce metric by at least 5% rel
        learner.reset_learning_rate(C.learning_rate_schedule(learner.learning_rate() / 2, C.learners.UnitType.sample))
        if learner.learning_rate() < lr_per_sample / (2**7-0.1): # we are done after the 6-th LR cut
            print("Learning rate {} too small. Training complete.".format(learner.learning_rate()))
            return False # means we are done
        print("Improvement of metric from {:.3f} to {:.3f} insufficient. Halving learning rate to {}.".format(prev_metric, average_error, learner.learning_rate()))
    prev_metric = average_error
    return True # means continue
Exemple #38
0
def create_learner(model):
    '''Create the optimized method'''
    lr_per_sample = C.learning_rate_schedule(opt.lr, C.UnitType.minibatch)
    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    if opt.optim == 'sgd':
        return C.sgd(model.parameters, lr=lr_per_sample)
    elif opt.optim == 'adam':
        return C.adam(model.parameters, lr=lr_per_sample, momentum=momentum_time_constant)
    elif opt.optim == 'adagrad':
        return C.adagrad(model.parameters, lr=lr_per_sample)
    else:
        raise RuntimeError("Invalid optim method: " + opt.optim)
def create_sample_model(device, writer=None):
    in1 = sequence.input_variable(shape=(input_dim,))
    labels = sequence.input_variable(shape=(input_dim,))
    p = parameter(shape=(input_dim,), init=10, device=device)
    z = plus(in1, reduce_sum(p), name='z')
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error(z, labels)

    lr_per_sample = C.learning_rate_schedule(
        [0.3, 0.2, 0.1, 0.0], C.UnitType.sample)
    learner = C.sgd(z.parameters, lr_per_sample)
    trainer = C.Trainer(z, (ce, errs), [learner], writer)
    return (trainer, in1, labels)
Exemple #40
0
def test_factor_dense_for_prediction():

    input_dim = 2
    num_output_classes = 2
    hidden_layer_dim = 50
    num_minibatches_to_train = 2000
    minibatch_size = 25
    learning_rate = 0.5

    input = C.input_variable(input_dim)
    label = C.input_variable(num_output_classes)

    z = _create_model_dense(input, input_dim, hidden_layer_dim, num_output_classes)

    loss = C.cross_entropy_with_softmax(z, label)
    eval_error = C.classification_error(z, label)

    # Instantiate the trainer object to drive the model training

    lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch)
    learner = C.sgd(z.parameters, lr_schedule)
    trainer = C.Trainer(z, (loss, eval_error), [learner])


    # Run the trainer and perform model training
    training_progress_output_freq = 20
    plotdata = {"batchsize":[], "loss":[], "error":[]}


    for i in range(0, int(num_minibatches_to_train)):
        features, labels = _generate_random_data_sample(minibatch_size, input_dim, num_output_classes)
        # Specify the input variables mapping in the model to actual minibatch data for training
        trainer.train_minibatch({input : features, label : labels})
    
    # generate some data to predict
    features, labels = _generate_random_data_sample(10, 2, 2)

    # factor the model.
    newz = nc.factor_dense(z, projection_function=_get_rank_reduced_size, filter_function = _filter)
    original_out = C.softmax(z)
    factored_out = C.softmax(newz)

    original_labels_probs = original_out.eval({input : features})
    predicted_label_probs = factored_out.eval({input : features})
    
    original_prediction_percentage = _percentage_match(labels, original_labels_probs) 

    # reduced model should have at leat 50% match compared to the original
    # For the test, we reduced the training minibatches, thus the match is lower.
    assert(original_prediction_percentage * 0.5 <= _percentage_match(labels, predicted_label_probs))
Exemple #41
0
def test_htk_deserializers():
    mbsize = 640
    epoch_size = 1000 * mbsize
    lr = [0.001]

    feature_dim = 33
    num_classes = 132
    context = 2

    os.chdir(data_path)

    features_file = "glob_0000.scp"
    labels_file = "glob_0000.mlf"
    label_mapping_file = "state.list"

    fd = HTKFeatureDeserializer(StreamDefs(
        amazing_features = StreamDef(shape=feature_dim, context=(context,context), scp=features_file)))

    ld = HTKMLFDeserializer(label_mapping_file, StreamDefs(
        awesome_labels = StreamDef(shape=num_classes, mlf=labels_file)))

    reader = MinibatchSource([fd,ld])

    features = C.input_variable(((2*context+1)*feature_dim))
    labels = C.input_variable((num_classes))

    model = Sequential([For(range(3), lambda : Recurrence(LSTM(256))),
                        Dense(num_classes)])
    z = model(features)
    ce = C.cross_entropy_with_softmax(z, labels)
    errs = C.classification_error    (z, labels)

    learner = C.adam_sgd(z.parameters,
                    lr=C.learning_rate_schedule(lr, C.UnitType.sample, epoch_size),
                    momentum=C.momentum_as_time_constant_schedule(1000),
                    low_memory=True,
                    gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True)
    trainer = C.Trainer(z, (ce, errs), learner)

    input_map={ features: reader.streams.amazing_features, labels: reader.streams.awesome_labels }

    pp = C.ProgressPrinter(freq=0)
    # just run and verify it doesn't crash
    for i in range(3):
        mb_data = reader.next_minibatch(mbsize, input_map=input_map)
        trainer.train_minibatch(mb_data)
        pp.update_with_trainer(trainer, with_metric=True)
    assert True
    os.chdir(abs_path)
Exemple #42
0
def test_clone_freeze():
    inputs = 3
    outputs = 5

    features = C.input_variable((inputs), np.float32)
    label = C.input_variable((outputs), np.float32)
    weights = C.parameter((inputs, outputs))
    const_weights = C.constant(weights.value)
    z = C.times(features, weights)
    c = C.times(features, const_weights)
    z_clone = z.clone('freeze')
    c_clone = c.clone('freeze')

    # check that z and z_clone are the same
    for p, q in zip(z.parameters, z_clone.constants):
        assert np.array_equal(p.value, q.value)

    # check that c and c_clone are the same
    for p, q in zip(c.constants, c_clone.constants):
        assert np.array_equal(p.value, q.value)

    # keep copies of the old values
    z_copies = [q.value for q in z_clone.constants]
    c_copies = [q.value for q in c_clone.constants]

    # update z
    trainer = C.Trainer(z, C.squared_error(z, label),  C.sgd(z.parameters, C.learning_rate_schedule(1.0, C.UnitType.minibatch)))
    x = np.random.randn(16,3).astype('f')
    y = np.random.randn(16,5).astype('f')
    trainer.train_minibatch({features: x, label: y})
    # update c
    for cc in c.constants:
        cc.value = np.random.randn(*cc.value.shape).astype('f')

    # check that z changed
    for p, q in zip(z.parameters, z_clone.constants):
        assert not np.array_equal(p.value, q.value)

    # check that z_clone did not change
    for p, q in zip(z_copies, z_clone.constants):
        assert np.array_equal(p, q.value)

    # check that c changed
    for p, q in zip(c.constants, c_clone.constants):
        assert not np.array_equal(p.value, q.value)

    # check that c_clone did not change
    for p, q in zip(c_copies, c_clone.constants):
        assert np.array_equal(p, q.value)
def train(nonlinearity, num_hidden_layers, device_id,
          minibatch_size=10, num_samples=1000):
    from cntk.cntk_py import always_allow_setting_default_device
    always_allow_setting_default_device()
    C.try_set_default_device(cntk_device(device_id))
    np.random.seed(0)

    learning_rate = 0.5
    lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch)

    hidden_layers_dim = 50

    inp = C.input_variable((input_dim), np.float32)
    label = C.input_variable((num_output_classes), np.float32)

    z = fully_connected_classifier_net(inp, num_output_classes, hidden_layers_dim,
                                       num_hidden_layers, nonlinearity)

    loss = C.cross_entropy_with_softmax(z, label)
    eval_error = C.classification_error(z, label)

    learner = C.sgd(z.parameters, lr_schedule)
    trainer = C.Trainer(z, (loss, eval_error), [learner])

    num_minibatches_to_train = int(num_samples / minibatch_size)

    training_progress_output_freq = 20

    losses = []
    errors = []

    for i in range(num_minibatches_to_train):
        features, labels = generate_random_data_sample(minibatch_size,
                                                       input_dim,
                                                       num_output_classes)

        # Specify the input variables mapping in the model to actual minibatch
        # data for training.
        trainer.train_minibatch({inp: features, label: labels},
                                device=cntk_device(device_id))

        batchsize, loss, error = print_training_progress(trainer, i,
                                                         training_progress_output_freq)

        if not (loss == "NA" or error == "NA"):
            losses.append(loss)
            errors.append(error)

    return losses, errors
Exemple #44
0
def test_data_resize():
    batch_size = 8
    w = C.parameter(shape=(3, 2), name='w1')
    x = C.input_variable(shape=[3], name='x')
    y = C.softmax(C.times(x, w))
    y = C.unpack_batch(y)
    y = C.reshape(y, [batch_size * 2])
    loss = C.reduce_mean(-C.log(y))

    learning_rate = 0.01
    lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch)
    learner = C.sgd(y.parameters, lr_schedule, gradient_clipping_threshold_per_sample=1.0)
    trainer = C.Trainer(y, (loss), [learner])

    features = np.random.randn(batch_size, 3)
    trainer.train_minibatch({x: features})
 def create_distributed_learner(self, mode, config):
     local_learner = C.sgd(self.z.parameters, C.learning_rate_schedule(0.01, unit=C.learners.UnitType.sample))
     try:
         if mode == 'data_parallel':
             if config is None:
                 config = DataParallelConfig(num_quantization_bits=32, distributed_after=0)
             learner = C.data_parallel_distributed_learner(local_learner, num_quantization_bits=config.num_quantization_bits, distributed_after=config.distributed_after)
         elif mode == 'block_momentum':
             if config is None:
                 # the default config to match data parallel SGD
                 config = BlockMomentumConfig(block_momentum_as_time_constant=0, block_learning_rate=1, block_size=NUM_WORKERS, distributed_after=0)
             learner = C.block_momentum_distributed_learner(local_learner, block_momentum_as_time_constant=config.block_momentum_as_time_constant, block_learning_rate=config.block_learning_rate, block_size=config.block_size, distributed_after=config.distributed_after)
         else:
             learner = local_learner
     except RuntimeError:
         learner = None
     return learner
Exemple #46
0
    def create_trainer(use_sparse, device):
        a = C.input_variable(shape=input_shape, is_sparse=use_sparse, name='input')
        w_i = C.parameter(init=w_init_i, device=dev)
        a_projection = times(a, w_i)

        p_o = C.placeholder_variable()
        h = C.past_value(p_o)
        w_h = C.parameter(init=w_init_h, device=dev)
        h_projection = times(h, w_h)        
        z = a_projection + h_projection
        z = z.replace_placeholder(z)
        z = reshape(z, label_shape)

        l = C.input_variable(shape=label_shape, is_sparse=use_sparse, name='label')
        loss = cross_entropy_with_softmax(z, l, axis=-1)
        trainer = C.Trainer(z, (loss, None), C.sgd(z.parameters, lr=C.learning_rate_schedule(0.007, C.UnitType.sample)))
        return (a, l, w_i, w_h, trainer)
def create_trainer(network, epoch_size, num_quantization_bits):
    # Set learning parameters
    lr_per_mb         = [0.01]*20 + [0.001]*20 + [0.0001]*20 + [0.00001]*10 + [0.000001]
    lr_schedule       = cntk.learning_rate_schedule(lr_per_mb, unit=cntk.learner.UnitType.minibatch, epoch_size=epoch_size)
    mm_schedule       = cntk.learner.momentum_schedule(0.9)
    l2_reg_weight     = 0.0005 # CNTK L2 regularization is per sample, thus same as Caffe
    
    # Create learner
    local_learner = cntk.learner.momentum_sgd(network['output'].parameters, lr_schedule, mm_schedule, unit_gain=False, l2_regularization_weight=l2_reg_weight)
    # Since we reuse parameter settings (learning rate, momentum) from Caffe, we set unit_gain to False to ensure consistency 
    parameter_learner = data_parallel_distributed_learner(
        local_learner, 
        num_quantization_bits=num_quantization_bits,
        distributed_after=0)

    # Create trainer
    return cntk.Trainer(network['output'], (network['ce'], network['pe']), parameter_learner)
Exemple #48
0
def test_usermbsource_training(tmpdir, with_checkpoint_impl):
    input_dim = 1000
    num_output_classes = 5

    mbs = MyDataSource(input_dim, num_output_classes)
    # Using this for testing the UserMinibatchSource checkpointing
    if with_checkpoint_impl:
        MBS_CV_CLASS = MyDataSourceWithCheckpoint
    else:
        MBS_CV_CLASS = MyDataSource

    mbs_cv = MBS_CV_CLASS(input_dim, num_output_classes)

    from cntk import sequence, parameter, plus, cross_entropy_with_softmax, \
            classification_error, learning_rate_schedule, sgd, Trainer, \
            training_session, times, UnitType

    feature = sequence.input_variable(shape=(input_dim,))
    label = C.input_variable(shape=(num_output_classes,))
    p = parameter(shape=(input_dim, num_output_classes), init=10)
    z = times(sequence.reduce_sum(feature), p, name='z')
    ce = cross_entropy_with_softmax(z, label)
    errs = classification_error(z, label)

    lr_per_sample = learning_rate_schedule(
        [0.3, 0.2, 0.1, 0.0], UnitType.sample)
    learner = sgd(z.parameters, lr_per_sample)
    trainer = Trainer(z, (ce, errs), [learner])
    input_map = {
        feature: mbs.fsi,
        label: mbs.lsi
    }

    session = training_session(
        trainer=trainer, mb_source=mbs,
        model_inputs_to_streams=input_map,
        mb_size=4, max_samples=20,
        cv_config = C.CrossValidationConfig(minibatch_source=mbs_cv, max_samples=10,
            minibatch_size=2)
    )
    session.train()

    assert trainer.total_number_of_samples_seen == 20
    if with_checkpoint_impl:
        assert mbs_cv._restore_from_checkpoint_calls == 1
def train_sequence_classifier():
    input_dim = 2000
    hidden_dim = 25
    embedding_dim = 50
    num_classes = 5

    # Input variables denoting the features and label data
    features = C.sequence.input_variable(shape=input_dim, is_sparse=True)
    label = C.input_variable(num_classes)

    # Instantiate the sequence classification model
    classifier_output = lstm_sequence_classifier(features, num_classes, embedding_dim, hidden_dim)

    ce = C.cross_entropy_with_softmax(classifier_output, label)
    pe = C.classification_error(classifier_output, label)

    rel_path = r"../../../../Tests/EndToEndTests/Text/SequenceClassification/Data/Train.ctf"
    path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path)

    reader = create_reader(path, True, input_dim, num_classes)

    input_map = {
        features : reader.streams.features,
        label    : reader.streams.labels
    }

    lr_per_sample = C.learning_rate_schedule(0.1, C.UnitType.sample)

    # Instantiate the trainer object to drive the model training
    progress_printer = C.logging.ProgressPrinter(0)
    trainer = C.Trainer(classifier_output, (ce, pe),
                        C.sgd(classifier_output.parameters, lr=lr_per_sample),
                        progress_printer)

    # Get minibatches of sequences to train with and perform model training
    minibatch_size = 200

    for i in range(251):
        mb = reader.next_minibatch(minibatch_size, input_map=input_map)
        trainer.train_minibatch(mb)

    evaluation_average = copy.copy(trainer.previous_minibatch_evaluation_average)
    loss_average = copy.copy(trainer.previous_minibatch_loss_average)

    return evaluation_average, loss_average
Exemple #50
0
def test_ext_lambdafunc(tmpdir):
    dim = 4

    class CallbackCounter(object):
        def __init__(self):
            self.count = 0

        def inc(self, arg):
            self.count += 1

    cb = CallbackCounter()

    p = C.parameter(shape=(dim,), init=1)
    i = C.input_variable(dim, needs_gradient=True, name='i_var')
    k = i * p
    m = LambdaFunc(k,
                   when=lambda arg: np.sum(arg) > 1,
                   execute=cb.inc)
    m = C.user_function(m)
    z0 = m + 0

    filepath = str(tmpdir / 'test_ext_lambdafunc.dat')
    z0.save(filepath)

    Function.register_udf_deserialize_callback('conditional_exec_lambda',
                                               lambda x, *unused: LambdaFunc(x, when=lambda arg: np.sum(arg) > 1, execute=cb.inc))

    z = Function.load(filepath)

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample)
    trainer = C.Trainer(z, (z + 0, z + 0), [C.momentum_sgd(z.parameters,
                                                           lr_per_sample,
                                                           momentum_time_constant,
                                                           True)])

    i = 0
    input_data = 0.1 * np.ones(dim)
    trainer.train_minibatch([input_data])
    assert cb.count == 0

    input_data = 0.3 * np.ones(dim)
    trainer.train_minibatch([input_data])
    assert cb.count == 1
Exemple #51
0
def test_ext_train(tmpdir):
    dim = 4

    p = C.parameter(shape=(dim,), init=10)
    i = C.sequence.input_variable(dim, needs_gradient=True, name='i_var')
    m = MyPlus(i, C.constant(3), 'my_plus')
    # keeping m unwrapped since we need to access its member variables
    z = C.user_function(m) + p

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample)
    trainer = C.Trainer(z, (z + 0, z + 0),
                        [C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant,
                                        True)])

    i = 0
    while i < 100:
        i += 1
        input_data = np.random.rand(dim)
        trainer.train_minibatch([input_data])

    assert m.forward_calls == m.backward_calls == 100

    filepath = str(tmpdir / 'test_ext_train.dat')

    z.save(filepath)

    buf = open(filepath, 'rb').read()

    # this is only need for Python 2.7
    # (which does not distinguish between bytes and strings)
    if isinstance(buf, str):
        buf = bytearray(buf)

    z1 = Function.load(buf)

    m1 = z1.find_by_name('my_plus')
    # m1 is an instance of UserFunction, cannot directly downcast it to MyPlus,
    # using serialize as workaround:
    state = m1.serialize()['state']

    assert state['forward_calls'] == state['backward_calls'] == 100
Exemple #52
0
def test_restore_constants(tmpdir):
    C.device.try_set_default_device(C.device.cpu())
    def _setvalue(x, v):
        x.value = 0 * x.value + v if len(x.shape)> 0 else np.array(v, dtype=np.float32)

    def _setall(f, v):
        for x in f.constants + f.parameters:
            _setvalue(x, v)

    def _checkall(f, v):
        for x in f.constants + f.parameters:
            assert (x.value == v).all()

    x = C.input_variable(10)
    f = C.layers.BatchNormalization()(x)
    trainer = C.Trainer(f, C.reduce_sum(f), C.sgd(f.parameters, C.learning_rate_schedule(0.1, 'sample')))

    model_filename = str(tmpdir / 'function.out')
    checkpoint_filename = str(tmpdir / 'checkpoint.out')
    _setall(f, 1)
    f.save(model_filename)
    _checkall(f, 1)

    _setall(f, 2)
    trainer.save_checkpoint(checkpoint_filename)
    _checkall(f, 2)

    _setall(f, 3)
    _checkall(f, 3)
    trainer.restore_from_checkpoint(checkpoint_filename)
    _checkall(f, 2)

    f2 = C.Function.load(model_filename)
    _checkall(f2, 1)

    _setall(f, 4)
    _checkall(f, 4)
    f.restore(model_filename)
    _checkall(f, 1)

    _setall(f2, 5)
    _checkall(f2, 5)
Exemple #53
0
def _train_backcompatible_test(z, loss, eval_error,
           f_input, l_input,
           num_output_classes,
           steps):
    np.random.seed(0)

    input_dim = 2

    lr_schedule = learning_rate_schedule(0.5, UnitType.minibatch)

    learner = sgd(z.parameters, lr_schedule)
    trainer = Trainer(z, (loss, eval_error), [learner])

    minibatch_size = 10

    for i in range(steps):
        features, labels = _generate_random_data_sample(
            minibatch_size, input_dim, num_output_classes)

        trainer.train_minibatch({f_input: features, l_input: labels})
Exemple #54
0
def test_udf_checkpointing(tmpdir):
    dev, w_value, c1_value, c2_value, op = build_test_function()

    label = C.constant(np.asarray([[1, 2], [3, 4]]).astype(np.float32))

    loss = C.cross_entropy_with_softmax(op, label)
    eval_error = C.classification_error(op, label)

    lr_schedule = C.learning_rate_schedule(0.5, C.UnitType.minibatch)
    learner = C.sgd(op.parameters, lr_schedule)
    trainer = C.Trainer(op, (loss, eval_error), [learner])

    trainer.train_minibatch({op.arguments[0]: np.random.random((2, 2)).astype(np.float32)}, device=dev)

    filepath = str(tmpdir / 'test_checkpointing.out')

    trainer.save_checkpoint(filepath, external_state={'test': 'test'})

    d = C.cntk_py.Dictionary.load(filepath)
    assert len(d.keys()) != 0
Exemple #55
0
def run_distributed_training(tmpdir, create_func):

    in1 = sequence.input_variable(shape=1)
    labels = sequence.input_variable(shape=1)
    p = parameter(shape=2, init=10)
    z = plus(in1, reduce_sum(p), name='z')
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error(z, labels)

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample)
    dist_learner = create_func(C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True))

    communicator = dist_learner.communicator()
    workers = communicator.workers()
    current_worker = communicator.current_worker()
    found_rank = False
    for wk in workers:
        if current_worker.global_rank == wk.global_rank:
            found_rank = True

    assert found_rank

    trainer = C.Trainer(z, (ce, errs), [ dist_learner ])
    in1_value = [[1],[2]]
    label_value = [[0], [1]]
    arguments = {in1: in1_value, labels: label_value}
    z_output = z.output
    updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output])
    
    p = str(tmpdir / 'checkpoint.dat')
    trainer.save_checkpoint(p)
    trainer.restore_from_checkpoint(p)

    communicator.barrier()

    assert trainer.model.name == 'z'

    # Ensure that Swig is not leaking raw types
    assert isinstance(trainer.model, Function)
    assert trainer.model.__doc__
def create_trainer(network, epoch_size, num_quantization_bits, printer, block_size, warm_up):
    # Set learning parameters
    lr_per_mb         = [0.01]*25 + [0.001]*25 + [0.0001]*25 + [0.00001]*25 + [0.000001]
    lr_schedule       = C.learning_rate_schedule(lr_per_mb, unit=C.learners.UnitType.minibatch, epoch_size=epoch_size)
    mm_schedule       = C.learners.momentum_schedule(0.9)
    l2_reg_weight     = 0.0005 # CNTK L2 regularization is per sample, thus same as Caffe

    if block_size != None and num_quantization_bits != 32:
        raise RuntimeError("Block momentum cannot be used with quantization, please remove quantized_bits option.")

    # Create learner
    local_learner = C.learners.momentum_sgd(network['output'].parameters, lr_schedule, mm_schedule, unit_gain=False, l2_regularization_weight=l2_reg_weight)
    # Since we reuse parameter settings (learning rate, momentum) from Caffe, we set unit_gain to False to ensure consistency

    # Create trainer
    if block_size != None:
        parameter_learner = block_momentum_distributed_learner(local_learner, block_size=block_size)
    else:
        parameter_learner = data_parallel_distributed_learner(local_learner, num_quantization_bits=num_quantization_bits, distributed_after=warm_up)

    return C.Trainer(network['output'], (network['ce'], network['pe']), parameter_learner, printer)
def convnetlrn_cifar10_dataaug(reader_train, reader_test, epoch_size=50000, max_epochs = 80):
    _cntk_py.set_computation_network_trace_level(1)

    # Input variables denoting the features and label data
    input_var = cntk.ops.input_variable((num_channels, image_height, image_width))
    label_var = cntk.ops.input_variable((num_classes))

    # apply model to input
    scaled_input = cntk.ops.element_times(cntk.ops.constant(0.00390625), input_var)

    with cntk.layers.default_options (activation=cntk.ops.relu, pad=True):
        z = cntk.models.Sequential([
            cntk.models.For(range(2), lambda : [
                cntk.layers.Convolution2D((3,3), 64),
                cntk.layers.Convolution2D((3,3), 64),
                LocalResponseNormalization (1.0, 4, 0.001, 0.75),
                cntk.layers.MaxPooling((3,3), (2,2))
            ]),
            cntk.models.For(range(2), lambda i: [
                cntk.layers.Dense([256,128][i]),
                cntk.layers.Dropout(0.5)
            ]),
            cntk.layers.Dense(num_classes, activation=None)
        ])(scaled_input)

    # loss and metric
    ce = cntk.ops.cross_entropy_with_softmax(z, label_var)
    pe = cntk.ops.classification_error(z, label_var)

    # training config
    minibatch_size = 64

    # Set learning parameters
    lr_per_sample          = [0.0015625]*20 + [0.00046875]*20 + [0.00015625]*20 + [0.000046875]*10 + [0.000015625]
    lr_schedule            = cntk.learning_rate_schedule(lr_per_sample, unit=cntk.learner.UnitType.sample, epoch_size=epoch_size)
    mm_time_constant       = [0]*20 + [600]*20 + [1200]
    mm_schedule            = cntk.learner.momentum_as_time_constant_schedule(mm_time_constant, epoch_size=epoch_size)
    l2_reg_weight          = 0.002

    # trainer object
    learner = cntk.learner.momentum_sgd(z.parameters, lr_schedule, mm_schedule,
                                        unit_gain = True,
                                        l2_regularization_weight = l2_reg_weight)
    trainer =  cntk.Trainer(z, (ce, pe), learner)

    # define mapping from reader streams to network inputs
    input_map = {
        input_var: reader_train.streams.features,
        label_var: reader_train.streams.labels
    }

    cntk.utils.log_number_of_parameters(z) ; print()
    progress_printer = cntk.utils.ProgressPrinter(tag='Training', num_epochs=max_epochs)

    # perform model training
    for epoch in range(max_epochs):       # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data = reader_train.next_minibatch(min(minibatch_size, epoch_size-sample_count), input_map=input_map) # fetch minibatch.
            trainer.train_minibatch(data)                                   # update model with it
            sample_count += trainer.previous_minibatch_sample_count         # count samples processed so far
            progress_printer.update_with_trainer(trainer, with_metric=True) # log progress

        progress_printer.epoch_summary(with_metric=True)
        z.save(os.path.join(model_path, "ConvNet_CIFAR10_DataAug_{}.dnn".format(epoch)))

    ### Evaluation action
    epoch_size     = 10000
    minibatch_size = 16

    # process minibatches and evaluate the model
    metric_numer    = 0
    metric_denom    = 0
    sample_count    = 0
    minibatch_index = 0

    while sample_count < epoch_size:
        current_minibatch = min(minibatch_size, epoch_size - sample_count)
        # Fetch next test min batch.
        data = reader_test.next_minibatch(current_minibatch, input_map=input_map)
        # minibatch data to be trained with
        metric_numer += trainer.test_minibatch(data) * current_minibatch
        metric_denom += current_minibatch
        # Keep track of the number of samples processed so far.
        sample_count += data[label_var].num_samples
        minibatch_index += 1

    print("")
    print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom))
    print("")

    return metric_numer/metric_denom
Exemple #58
0
def convnet_mnist(debug_output=False, epoch_size=60000, minibatch_size=64, max_epochs=40):
    image_height = 28
    image_width  = 28
    num_channels = 1
    input_dim = image_height * image_width * num_channels
    num_output_classes = 10

    # Input variables denoting the features and label data
    input_var = C.ops.input_variable((num_channels, image_height, image_width), np.float32)
    label_var = C.ops.input_variable(num_output_classes, np.float32)

    # Instantiate the feedforward classification model
    scaled_input = C.ops.element_times(C.ops.constant(0.00390625), input_var)

    with C.layers.default_options(activation=C.ops.relu, pad=False):
        conv1 = C.layers.Convolution2D((5,5), 32, pad=True)(scaled_input)
        pool1 = C.layers.MaxPooling((3,3), (2,2))(conv1)
        conv2 = C.layers.Convolution2D((3,3), 48)(pool1)
        pool2 = C.layers.MaxPooling((3,3), (2,2))(conv2)
        conv3 = C.layers.Convolution2D((3,3), 64)(pool2)
        f4    = C.layers.Dense(96)(conv3)
        drop4 = C.layers.Dropout(0.5)(f4)
        z     = C.layers.Dense(num_output_classes, activation=None)(drop4)

    ce = C.losses.cross_entropy_with_softmax(z, label_var)
    pe = C.metrics.classification_error(z, label_var)

    reader_train = create_reader(os.path.join(data_path, 'Train-28x28_cntk_text.txt'), True, input_dim, num_output_classes)

    # Set learning parameters
    lr_per_sample    = [0.001]*10 + [0.0005]*10 + [0.0001]
    lr_schedule      = C.learning_rate_schedule(lr_per_sample, C.learners.UnitType.sample, epoch_size)
    mm_time_constant = [0]*5 + [1024]
    mm_schedule      = C.learners.momentum_as_time_constant_schedule(mm_time_constant, epoch_size)

    # Instantiate the trainer object to drive the model training
    learner = C.learners.momentum_sgd(z.parameters, lr_schedule, mm_schedule)
    progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=max_epochs)
    trainer = C.Trainer(z, (ce, pe), learner, progress_printer)

    # define mapping from reader streams to network inputs
    input_map = {
        input_var : reader_train.streams.features,
        label_var : reader_train.streams.labels
    }

    C.logging.log_number_of_parameters(z) ; print()

    # Get minibatches of images to train with and perform model training
    for epoch in range(max_epochs):       # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data = reader_train.next_minibatch(min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch.
            trainer.train_minibatch(data)                                   # update model with it
            sample_count += data[label_var].num_samples                     # count samples processed so far

        trainer.summarize_training_progress()
        z.save(os.path.join(model_path, "ConvNet_MNIST_{}.dnn".format(epoch)))
    
    # Load test data
    reader_test = create_reader(os.path.join(data_path, 'Test-28x28_cntk_text.txt'), False, input_dim, num_output_classes)

    input_map = {
        input_var : reader_test.streams.features,
        label_var : reader_test.streams.labels
    }

    # Test data for trained model
    epoch_size = 10000
    minibatch_size = 1024

    # process minibatches and evaluate the model
    metric_numer    = 0
    metric_denom    = 0
    sample_count    = 0
    minibatch_index = 0

    while sample_count < epoch_size:
        current_minibatch = min(minibatch_size, epoch_size - sample_count)

        # Fetch next test min batch.
        data = reader_test.next_minibatch(current_minibatch, input_map=input_map)

        # minibatch data to be trained with
        metric_numer += trainer.test_minibatch(data) * current_minibatch
        metric_denom += current_minibatch

        # Keep track of the number of samples processed so far.
        sample_count += data[label_var].num_samples
        minibatch_index += 1

    print("")
    print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom))
    print("")

    return metric_numer/metric_denom
def deconv_mnist(max_epochs=3):
    image_height = 28
    image_width  = 28
    num_channels = 1
    input_dim = image_height * image_width * num_channels
    num_output_classes = 10

    # Input variable and normalization
    input_var = cntk.ops.input_variable((num_channels, image_height, image_width), np.float32)
    scaled_input = cntk.ops.element_times(cntk.ops.constant(0.00390625), input_var)

    # Define the auto encoder model
    cMap = 1
    conv1   = cntk.layers.Convolution2D  ((5,5), cMap, pad=True, activation=cntk.ops.relu)(scaled_input)
    pool1   = cntk.layers.MaxPooling   ((4,4), (4,4))(conv1)
    unpool1 = cntk.layers.MaxUnpooling ((4,4), (4,4))(pool1, conv1)
    z       = cntk.layers.ConvolutionTranspose2D((5,5), num_channels, pad=True, bias=False, init=cntk.glorot_uniform(0.001))(unpool1)

    # define rmse loss function (should be 'err = cntk.ops.minus(deconv1, scaled_input)')
    f2        = cntk.ops.element_times(cntk.ops.constant(0.00390625), input_var)
    err       = cntk.ops.reshape(cntk.ops.minus(z, f2), (784))
    sq_err    = cntk.ops.element_times(err, err)
    mse       = cntk.ops.reduce_mean(sq_err)
    rmse_loss = cntk.ops.sqrt(mse)
    rmse_eval = cntk.ops.sqrt(mse)

    reader_train = create_reader(os.path.join(data_path, 'Train-28x28_cntk_text.txt'), True, input_dim, num_output_classes)

    # training config
    epoch_size = 60000
    minibatch_size = 64

    # Set learning parameters
    lr_schedule = cntk.learning_rate_schedule([0.00015], cntk.learner.UnitType.sample, epoch_size)
    mm_schedule = cntk.learner.momentum_as_time_constant_schedule([600], epoch_size)

    # Instantiate the trainer object to drive the model training
    learner = cntk.learner.momentum_sgd(z.parameters, lr_schedule, mm_schedule, unit_gain=True)
    progress_printer = cntk.utils.ProgressPrinter(tag='Training')
    trainer = cntk.Trainer(z, (rmse_loss, rmse_eval), learner, progress_printer)

    # define mapping from reader streams to network inputs
    input_map = {
        input_var : reader_train.streams.features
    }

    cntk.utils.log_number_of_parameters(z) ; print()

    # Get minibatches of images to train with and perform model training
    for epoch in range(max_epochs):       # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data = reader_train.next_minibatch(min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch.
            trainer.train_minibatch(data)                                   # update model with it
            sample_count += data[input_var].num_samples                     # count samples processed so far

        trainer.summarize_training_progress()
        z.save(os.path.join(model_path, "07_Deconvolution_PY_{}.model".format(epoch)))

    # rename final model
    last_model_name = os.path.join(model_path, "07_Deconvolution_PY_{}.model".format(max_epochs - 1))
    final_model_name = os.path.join(model_path, "07_Deconvolution_PY.model")
    try:
        os.remove(final_model_name)
    except OSError:
        pass
    os.rename(last_model_name, final_model_name)
    
    # Load test data
    reader_test = create_reader(os.path.join(data_path, 'Test-28x28_cntk_text.txt'), False, input_dim, num_output_classes)

    input_map = {
        input_var : reader_test.streams.features
    }

    # Test data for trained model
    epoch_size = 10000
    minibatch_size = 1024

    # process minibatches and evaluate the model
    metric_numer    = 0
    metric_denom    = 0
    sample_count    = 0
    minibatch_index = 0

    while sample_count < epoch_size:
        current_minibatch = min(minibatch_size, epoch_size - sample_count)

        # Fetch next test min batch.
        data = reader_test.next_minibatch(current_minibatch, input_map=input_map)

        # minibatch data to be trained with
        metric_numer += trainer.test_minibatch(data) * current_minibatch
        metric_denom += current_minibatch

        # Keep track of the number of samples processed so far.
        sample_count += data[input_var].num_samples
        minibatch_index += 1

    print("")
    print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom))
    print("")

    return metric_numer/metric_denom
Exemple #60
0
def conv3d_ucf11(train_reader, test_reader, max_epochs=30):
    # Replace 0 with 1 to get detailed log.
    set_computation_network_trace_level(0)

    # These values must match for both train and test reader.
    image_height       = train_reader.height
    image_width        = train_reader.width
    num_channels       = train_reader.channel_count
    sequence_length    = train_reader.sequence_length
    num_output_classes = train_reader.label_count

    # Input variables denoting the features and label data
    input_var = C.input_variable((num_channels, sequence_length, image_height, image_width), np.float32)
    label_var = C.input_variable(num_output_classes, np.float32)

    # Instantiate simple 3D Convolution network inspired by VGG network 
    # and http://vlg.cs.dartmouth.edu/c3d/c3d_video.pdf
    with C.default_options (activation=C.relu):
        z = C.layers.Sequential([
            C.layers.Convolution3D((3,3,3), 64, pad=True),
            C.layers.MaxPooling((1,2,2), (1,2,2)),
            C.layers.For(range(3), lambda i: [
                C.layers.Convolution3D((3,3,3), [96, 128, 128][i], pad=True),
                C.layers.Convolution3D((3,3,3), [96, 128, 128][i], pad=True),
                C.layers.MaxPooling((2,2,2), (2,2,2))
            ]),
            C.layers.For(range(2), lambda : [
                C.layers.Dense(1024), 
                C.layers.Dropout(0.5)
            ]),
            C.layers.Dense(num_output_classes, activation=None)
        ])(input_var)
    
    # loss and classification error.
    ce = C.cross_entropy_with_softmax(z, label_var)
    pe = C.classification_error(z, label_var)

    # training config
    train_epoch_size     = train_reader.size()
    train_minibatch_size = 2

    # Set learning parameters
    lr_per_sample          = [0.01]*10+[0.001]*10+[0.0001]
    lr_schedule            = C.learning_rate_schedule(lr_per_sample, epoch_size=train_epoch_size, unit=C.UnitType.sample)
    momentum_time_constant = 4096
    mm_schedule            = C.momentum_as_time_constant_schedule([momentum_time_constant])

    # Instantiate the trainer object to drive the model training
    learner = C.momentum_sgd(z.parameters, lr_schedule, mm_schedule, True)
    progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs)
    trainer = C.Trainer(z, (ce, pe), learner, progress_printer)

    log_number_of_parameters(z) ; print()

    # Get minibatches of images to train with and perform model training
    for epoch in range(max_epochs):       # loop over epochs
        train_reader.reset()

        while train_reader.has_more():
            videos, labels, current_minibatch = train_reader.next_minibatch(train_minibatch_size)
            trainer.train_minibatch({input_var : videos, label_var : labels})

        trainer.summarize_training_progress()

    # Test data for trained model
    epoch_size     = test_reader.size()
    test_minibatch_size = 2

    # process minibatches and evaluate the model
    metric_numer    = 0
    metric_denom    = 0
    minibatch_index = 0

    test_reader.reset()    
    while test_reader.has_more():
        videos, labels, current_minibatch = test_reader.next_minibatch(test_minibatch_size)
        # minibatch data to be trained with
        metric_numer += trainer.test_minibatch({input_var : videos, label_var : labels}) * current_minibatch
        metric_denom += current_minibatch
        # Keep track of the number of samples processed so far.
        minibatch_index += 1

    print("")
    print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom))
    print("")

    return metric_numer/metric_denom