def test_epochsize_wrn_for_parameter_schedule():
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always")

        C.learning_parameter_schedule(0.01, minibatch_size=1, epoch_size=1000)

        assert len(w) == 1
        assert issubclass(w[-1].category, RuntimeWarning)
        assert "epoch_size" in str(w[-1].message)
Beispiel #2
0
def test_ext_backpropstate(payload):

    class TestBackPropState(UserFunction):
        def __init__(self, arg, payload, name='f1'):
            self.payload = payload
            super(TestBackPropState, self).__init__([arg])

        def infer_outputs(self):
            return [C.output_variable(self.inputs[0].shape, self.inputs[0].dtype, self.inputs[0].dynamic_axes)]

        def forward(self, argument, device=None, outputs_to_retain=None):
            return self.payload, argument

        def backward(self, state, root_gradients):
            assert state == self.payload
            return root_gradients

    dim = 4

    p = C.parameter(shape=(dim,), init=10)
    in1 = C.input_variable(dim, needs_gradient=True, name='i_var')
    m = C.user_function(TestBackPropState(in1, payload))
    z = m + p

    lr_per_sample = C.learning_parameter_schedule(0.007, minibatch_size=1)
    trainer = C.Trainer(None, (z), [C.sgd(z.parameters, lr_per_sample)])

    for i in range(100):
        input_data = np.random.rand(dim)
        trainer.train_minibatch({in1: [input_data]})
def test_model_not_criterion_subset():
    input_dim = 2
    proj_dim = 11
    model1_dim = 3
    model2_dim = 4
    x = sequence.input_variable((input_dim,))

    core = C.layers.Embedding(proj_dim)
    model1 = C.layers.Dense(model1_dim)(sequence.last(core(x)))
    model1_label = C.input_variable((model1_dim,))
    ce_model1 = cross_entropy_with_softmax(model1, model1_label)
    pe_model1 = classification_error(model1, model1_label)

    model2 = C.layers.Dense(model2_dim)(core(x))
    model2_label = sequence.input_variable((model2_dim,))
    ce_model2 = cross_entropy_with_softmax(model2, model2_label)
    pe_model2 = classification_error(model2, model2_label)

    ce = 0.5 * sequence.reduce_sum(ce_model2) + 0.5 * ce_model1

    lr_schedule = C.learning_parameter_schedule(0.003, minibatch_size =1)
    trainer_multitask = C.Trainer(model1, (ce, pe_model1), C.sgd(ce.parameters, lr=lr_schedule))

    x_data = np.asarray([[2., 1.], [1., 2.]], np.float32)
    model1_label_data = np.asarray([1., 0., 0.], np.float32)
    model2_label_data = np.asarray([[0., 1., 0., 0.], [0., 0., 0., 1.]], np.float32)
    trainer_multitask.train_minibatch({x : [x_data], model1_label : [model1_label_data], model2_label : [model2_label_data]})
def test_trainer_with_some_params_not_learned():
    input_dim = 2
    proj_dim = 2
    x = C.input_variable(shape=(input_dim,))
    W = parameter(shape=(input_dim, proj_dim), init=C.glorot_uniform())
    B = parameter(shape=(proj_dim,), init=C.glorot_uniform())
    t = times(x, W)
    z = t + B

    W_orig_value = W.value
    B_orig_value = B.value

    labels = C.input_variable(shape=(proj_dim,))
    ce = cross_entropy_with_softmax(z, labels)
    pe = classification_error(z, labels)

    lr_per_sample = C.learning_parameter_schedule(0.1, minibatch_size =1)
    trainer = C.Trainer(z, (ce, pe), C.sgd([W], lr_per_sample))

    x_value = [[1, 1],[2, 2]]
    label_value = [[0, 1], [1, 0]]
    arguments = {x: x_value, labels: label_value}

    num_iters = 3
    for i in range(num_iters):
        trainer.train_minibatch(arguments)

        assert np.array_equal(B.value, B_orig_value)
        assert not np.array_equal(W.value, W_orig_value)
        W_orig_value = W.value

    trainer.test_minibatch(arguments)
def test_empty_minibatch():
    scalar = C.input_variable((1,), dtype=np.float32, name='tscalar')
    op = scalar + parameter(init=np.asarray([1]), dtype=np.float32)

    lr_per_sample = C.learning_parameter_schedule(0.1,  minibatch_size =1)
    trainer = C.Trainer(op, (op, None), C.sgd(op.parameters, lr_per_sample))
    trainer.train_minibatch({})
def create_trainer(network, epoch_size, num_quantization_bits,
                   progress_printer):
    # Set learning parameters
    lr_per_mb = [0.01] * 20 + [0.001] * 20 + [0.0001] * 20 + [0.00001] * 10 + [
        0.000001
    ]
    lr_schedule = C.learning_parameter_schedule(lr_per_mb,
                                                epoch_size=epoch_size)
    mm_schedule = C.learners.momentum_schedule(0.9)
    l2_reg_weight = 0.0005  # CNTK L2 regularization is per sample, thus same as Caffe

    # Create learner
    local_learner = C.learners.momentum_sgd(
        network['output'].parameters,
        lr_schedule,
        mm_schedule,
        unit_gain=False,
        l2_regularization_weight=l2_reg_weight)
    # Since we reuse parameter settings (learning rate, momentum) from Caffe, we set unit_gain to False to ensure consistency
    parameter_learner = data_parallel_distributed_learner(
        local_learner,
        num_quantization_bits=num_quantization_bits,
        distributed_after=0)

    # Create trainer
    return C.Trainer(network['output'], (network['ce'], network['pe']),
                     parameter_learner, progress_printer)
def train(reader, model, loss_function, error_function, input_map, num_sweeps_to_train_with = 10, num_samples_per_sweep = 6000, minibatch_size = 64, learning_rate = 0.2):    
    # Instantiate the trainer object to drive the model training    
    lr_schedule = C.learning_parameter_schedule(learning_rate)
    learner = C.sgd(model.parameters, lr_schedule)

    # Print progress
    progress_printer_stdout = ProgressPrinter(freq=minibatch_size)

    # Instantiate trainer
    trainer = C.Trainer(model, (loss_function, error_function), [learner], progress_writers=progress_printer_stdout)

    # Start a timer
    start = time.time()
    aggregate_metric = 0
    total_samples = 0
    num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size

    for i in range(0, int(num_minibatches_to_train)):
        # Read a mini batch from the training data file
        data = reader.next_minibatch(minibatch_size, input_map=input_map)
        trainer.train_minibatch(data)
        samples = trainer.previous_minibatch_sample_count
        aggregate_metric += trainer.previous_minibatch_evaluation_average * samples
        total_samples += samples       

    # Print training time
    print("Training took {:.1f} sec".format(time.time() - start))
    print("Average error: {0:.2f}%".format((aggregate_metric * 100.0) / (total_samples)))

    return trainer
Beispiel #8
0
    def create_trainer(self):
        try:
            p = self.output.parameters
            # Three of four parameters are learned by block_momentum_distributed_learner.
            bmd_learner = cntk.block_momentum_distributed_learner(
                cntk.momentum_sgd(
                    [p[0], p[1], p[2]],
                    cntk.learning_parameter_schedule(0.0001),
                    cntk.momentum_as_time_constant_schedule(1000)),
                block_size=1000,
                block_learning_rate=0.01,
                block_momentum_as_time_constant=1000)

            # New API to mark which learner is to use for metric aggregaion.
            bmd_learner.set_as_metric_aggregator()

            # The last parameter is learned by the data_parallel_distributed_learner.
            momentum_schedule = cntk.momentum_schedule_per_sample(
                0.9990913221888589)
            lr_per_sample = cntk.learning_parameter_schedule_per_sample(0.007)
            dpd_learner = cntk.data_parallel_distributed_learner(
                cntk.momentum_sgd([p[3]], lr_per_sample, momentum_schedule,
                                  True))

            comm_rank = cntk.distributed.Communicator.rank()
            self.trainer = cntk.Trainer(
                self.output, (self.ce, self.err), [bmd_learner, dpd_learner], [
                    cntk.logging.ProgressPrinter(
                        freq=progress_freq, tag="Training", rank=comm_rank)
                ])
        except RuntimeError:
            self.trainer = None
        return
Beispiel #9
0
def test_ext_backpropstate(payload):
    class TestBackPropState(UserFunction):
        def __init__(self, arg, payload, name='f1'):
            self.payload = payload
            super(TestBackPropState, self).__init__([arg])

        def infer_outputs(self):
            return [
                C.output_variable(self.inputs[0].shape, self.inputs[0].dtype,
                                  self.inputs[0].dynamic_axes)
            ]

        def forward(self, argument, device=None, outputs_to_retain=None):
            return self.payload, argument

        def backward(self, state, root_gradients):
            assert state == self.payload
            return root_gradients

    dim = 4

    p = C.parameter(shape=(dim, ), init=10)
    in1 = C.input_variable(dim, needs_gradient=True, name='i_var')
    m = C.user_function(TestBackPropState(in1, payload))
    z = m + p

    lr_per_sample = C.learning_parameter_schedule(0.007, minibatch_size=1)
    trainer = C.Trainer(None, (z), [C.sgd(z.parameters, lr_per_sample)])

    for i in range(100):
        input_data = np.random.rand(dim)
        trainer.train_minibatch({in1: [input_data]})
def test_trainer(tmpdir, no_eval_function):
    in1 = C.input_variable(shape=(1,))
    labels = C.input_variable(shape=(1,))
    p = parameter(shape=(2,), init=10)
    z = plus(in1, reduce_sum(p), name='z')
    ce = cross_entropy_with_softmax(z, labels)
    if no_eval_function:
        errs = None
    else:
        errs = classification_error(z, labels)

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_parameter_schedule(0.007, minibatch_size =1)
    trainer = C.Trainer(z, (ce, errs),
            [C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)])
    in1_value = [[1],[2]]
    label_value = [[0], [1]]
    arguments = {in1: in1_value, labels: label_value}
    z_output = z.output
    updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output])

    p = str(tmpdir / 'checkpoint.dat')
    external_state = {"additional external state":math.pi, "nested dict":{"a":"b"}, "list":[1,2,3]}
    trainer.save_checkpoint(p, external_state)
    restored_state = trainer.restore_from_checkpoint(p)

    assert external_state == restored_state

    assert trainer.model.name == 'z'

    # Ensure that Swig is not leaking raw types
    assert isinstance(trainer.model, Function)
    assert trainer.model.__doc__
    assert isinstance(trainer.parameter_learners[0], C.Learner)
Beispiel #11
0
def main(params):
    # Create output and log directories if they don't exist
    if not os.path.isdir(params['output_folder']):
        os.makedirs(params['output_folder'])

    if not os.path.isdir(params['log_folder']):
        os.makedirs(params['log_folder'])

    # Create the network
    network = create_network()

    # Create readers
    train_reader = cbf_reader(os.path.join(params['input_folder'], 'train{}.cbf'.format(params['prefix'])), is_training=True,
                              max_samples=cntk.io.INFINITELY_REPEAT)
    cv_reader = cbf_reader(os.path.join(params['input_folder'], 'test{}.cbf'.format(params['prefix'])), is_training=False,
                           max_samples=cntk.io.FULL_DATA_SWEEP)
    test_reader = cbf_reader(os.path.join(params['input_folder'], 'test{}.cbf'.format(params['prefix'])), is_training=False,
                             max_samples=cntk.io.FULL_DATA_SWEEP)

    input_map = {
        network['input']: train_reader.streams.front,
        network['target']: train_reader.streams.label
    }

    # Create learner
    mm_schedule = momentum_schedule(0.90)
    lr_schedule = learning_parameter_schedule([(40, 0.1), (40, 0.01)], minibatch_size=params['minibatch_size'])
    learner = cntk.adam(network['model'].parameters, lr_schedule, mm_schedule, l2_regularization_weight=0.0005,
                        epoch_size=params['epoch_size'], minibatch_size=params['minibatch_size'])

    # Use TensorBoard for visual logging
    log_file = os.path.join(params['log_folder'], 'log.txt')
    pp_writer = cntk.logging.ProgressPrinter(freq=10, tag='Training', num_epochs=params['max_epochs'], log_to_file=log_file)
    tb_writer = cntk.logging.TensorBoardProgressWriter(freq=10, log_dir=params['log_folder'], model=network['model'])

    # Create trainer and training session
    trainer = Trainer(network['model'], (network['loss'], network['metric']), [learner], [pp_writer, tb_writer])
    test_config = TestConfig(minibatch_source=test_reader, minibatch_size=params['minibatch_size'], model_inputs_to_streams=input_map)
    cv_config = CrossValidationConfig(minibatch_source=cv_reader, frequency=(1, DataUnit.sweep),
                                      minibatch_size=params['minibatch_size'], model_inputs_to_streams=input_map)
    checkpoint_config = CheckpointConfig(os.path.join(params['output_folder'], model_name), frequency=(10, DataUnit.sweep), restore=params['restore'])

    session = training_session(trainer=trainer,
                               mb_source=train_reader,
                               mb_size=params['minibatch_size'],
                               model_inputs_to_streams=input_map,
                               max_samples=params['epoch_size'] * params['max_epochs'],
                               progress_frequency=(1, DataUnit.sweep),
                               checkpoint_config=checkpoint_config,
                               cv_config=cv_config,
                               test_config=test_config)

    cntk.logging.log_number_of_parameters(network['model'])
    session.train()

    # Save the trained model
    path = os.path.join(params['output_folder'], 'final_model.dnn')
    network['model'].save(path)
    print('Saved final model to', path)
Beispiel #12
0
def train(reader, model_func, max_epochs=10, task='slot_tagging'):
    
    # Create the containers for input feature (x) and the label (y)
    x = C.sequence.input_variable(vocab_size)
    y = C.sequence.input_variable(num_labels)
    # Instantiate the model function; x is the input (feature) variable 
    model = model_func(x)
    # Instantiate the loss and error function
    loss, label_error = create_criterion_function_preferred(model, y)

    # training config
    epoch_size = 18000        # 18000 samples is half the dataset size 
    minibatch_size = 70
    
    # LR schedule over epochs 
    # In CNTK, an epoch is how often we get out of the minibatch loop to
    # do other stuff (e.g. checkpointing, adjust learning rate, etc.)
    lr_per_sample = [3e-4]*4+[1.5e-4]
    lr_per_minibatch = [lr * minibatch_size for lr in lr_per_sample]
    lr_schedule = C.learning_parameter_schedule(lr_per_minibatch, epoch_size=epoch_size)
    
    # Momentum schedule
    momentums = C.momentum_schedule(0.9048374180359595, minibatch_size=minibatch_size)
    
    # We use a the Adam optimizer which is known to work well on this dataset
    # Feel free to try other optimizers from 
    # https://www.cntk.ai/pythondocs/cntk.learner.html#module-cntk.learner
    learner = C.adam(parameters=model.parameters,
                     lr=lr_schedule,
                     momentum=momentums,
                     gradient_clipping_threshold_per_sample=15, 
                     gradient_clipping_with_truncation=True)

    # Setup the progress updater
    progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=max_epochs)
    
    # Uncomment below for more detailed logging
    #progress_printer = ProgressPrinter(freq=100, first=10, tag='Training', num_epochs=max_epochs) 

    # Instantiate the trainer
    trainer = C.Trainer(model, (loss, label_error), learner, progress_printer)

    # process minibatches and perform model training
    C.logging.log_number_of_parameters(model)
    
    # Assign the data fields to be read from the input
    if task == 'slot_tagging':
        data_map={x: reader.streams.query, y: reader.streams.slot_labels}
    else:
        data_map={x: reader.streams.query, y: reader.streams.intent} 
    t = 0
    for epoch in range(max_epochs):         # loop over epochs
        epoch_end = (epoch+1) * epoch_size
        while t < epoch_end:                # loop over minibatches on the epoch
            data = reader.next_minibatch(minibatch_size, input_map= data_map)  # fetch minibatch
            trainer.train_minibatch(data)               # update model with it
            t += data[y].num_samples                    # samples so far
        trainer.summarize_training_progress()
Beispiel #13
0
def main():
    show_image = False
    sigma_r = 8
    grid_sz = 64
    if show_image:
        sz = 256
        n_chans = 3
        bs = 1
        data = skio.imread("/data/rgb.png").mean(2)[:sz, :sz].astype(
            np.float32)
        data = np.expand_dims(data / 255.0, 0)
        n_epochs = 1000
        lr = 0.001
    else:
        sz = 1024
        n_chans = 3
        bs = 4
        N = 4
        data = np.random.uniform(size=[N, sz, sz]).astype(np.float32)
        n_epochs = 50
        lr = 0.000000001
    imdata = np.tile(np.expand_dims(data, 1), [1, n_chans, 1, 1])

    im = C.input_variable([n_chans, sz, sz], needs_gradient=True)
    guide = C.input_variable([sz, sz], needs_gradient=True)
    guide_no_grad = C.input_variable([sz, sz], needs_gradient=False)
    model = BilateralSlice(sz,
                           n_chans,
                           n_chans,
                           sigma_r=sigma_r,
                           grid_sz=grid_sz)
    out = model(im, guide, guide_no_grad)

    svg = C.logging.graph.plot(out, "/output/graph.svg")

    if show_image:
        # --- Show output -----------------------------------------------------------
        inputs = {im: imdata[0], guide: data[0], guide_no_grad: data[0]}
        out_ = out.eval(inputs)
        out_ = np.clip(np.transpose(np.squeeze(out_), [1, 2, 0]), 0, 1)
        skio.imsave("/output/imout.png", out_)
    else:
        # --- Train -----------------------------------------------------------------
        loss = C.squared_error(out, im)
        C.debugging.profiler.start_profiler("/output/pyprof")
        C.debugging.profiler.enable_profiler()
        learner = C.sgd(model.parameters, C.learning_parameter_schedule(lr))
        progress_writer = C.logging.ProgressPrinter(0)
        begin = time.time()
        summary = loss.train((imdata, data, data),
                             parameter_learners=[learner],
                             callbacks=[progress_writer],
                             max_epochs=n_epochs,
                             minibatch_size=bs)
        end = time.time()
        runtime = (end - begin) * 1000.0 / n_epochs
        print('Runtime:', runtime)
        C.debugging.profiler.stop_profiler()
def test_sgd_with_noise():
    # Runs a network where the number of parameters is odd
    # in some layers. This tests that cuRand library will not
    # complain about generating an odd number of random values
    np.random.seed(98052)
    learner = lambda params: sgd(params, lr=C.learning_parameter_schedule(0.125), gaussian_noise_injection_std_dev=0.01)
    ffnet(learner)
    # We just verify that we did not crash
    assert(True)
Beispiel #15
0
def train(nonlinearity,
          num_hidden_layers,
          device_id,
          minibatch_size=10,
          num_samples=1000):
    from cntk.cntk_py import always_allow_setting_default_device
    always_allow_setting_default_device()
    C.try_set_default_device(cntk_device(device_id))
    np.random.seed(0)

    learning_rate = 0.5
    lr_schedule = C.learning_parameter_schedule(learning_rate)

    hidden_layers_dim = 50

    inp = C.input_variable((input_dim), np.float32)
    label = C.input_variable((num_output_classes), np.float32)

    z = fully_connected_classifier_net(inp, num_output_classes,
                                       hidden_layers_dim, num_hidden_layers,
                                       nonlinearity)

    loss = C.cross_entropy_with_softmax(z, label)
    eval_error = C.classification_error(z, label)

    learner = C.sgd(z.parameters, lr_schedule, minibatch_size=0)
    trainer = C.Trainer(z, (loss, eval_error), [learner])

    num_minibatches_to_train = int(num_samples / minibatch_size)

    training_progress_output_freq = 20

    losses = []
    errors = []

    for i in range(num_minibatches_to_train):
        features, labels = generate_random_data_sample(minibatch_size,
                                                       input_dim,
                                                       num_output_classes)

        # Specify the input variables mapping in the model to actual minibatch
        # data for training.
        trainer.train_minibatch({
            inp: features,
            label: labels
        },
                                device=cntk_device(device_id))

        batchsize, loss, error = print_training_progress(
            trainer, i, training_progress_output_freq)

        if not (loss == "NA" or error == "NA"):
            losses.append(loss)
            errors.append(error)

    return losses, errors
def test_universal():
    np.random.seed(98052)
    builtin_sgd = lambda params: sgd(params, lr=C.learning_parameter_schedule(0.125))
    builtin_last_avg_error, builtin_avg_error, _ = ffnet(builtin_sgd)
    np.random.seed(98052)
    my_sgd = lambda ps, gs: C.combine([C.assign(p, p - 0.125/25 * g) for p, g in zip(ps, gs)])
    universal_sgd = lambda params: universal(my_sgd, params)
    my_last_avg_error, my_avg_error, _ = ffnet(universal_sgd)
    assert np.all(np.less_equal(my_last_avg_error, builtin_last_avg_error))
    assert np.all(np.less_equal(my_avg_error, builtin_avg_error))
Beispiel #17
0
def train():
    model = Model()
    z, loss, acc = model.model()

    progress_writers = [
        C.logging.ProgressPrinter(num_epochs=max_epochs,
                                  freq=log_freq,
                                  tag='Training',
                                  log_to_file='log/log_' + version)
    ]

    lr = C.learning_parameter_schedule(learning_rate,
                                       minibatch_size=None,
                                       epoch_size=None)
    learner = C.adadelta(z.parameters, lr)
    trainer = C.Trainer(z, (loss, acc), learner, progress_writers)

    mb_source, input_map = deserialize(loss, train_data, model)
    mb_valid, valid_map = deserialize(loss, valid_data, model)

    try:
        trainer.restore_from_checkpoint('../model/' + version)
    except Exception:
        print('No checkpoint.')

    for epoch in range(max_epochs):
        # train
        num_seq = 0
        with tqdm(total=epoch_size, ncols=79) as progress_bar:
            while True:
                data = mb_source.next_minibatch(minibatch_size,
                                                input_map=input_map)
                trainer.train_minibatch(data)
                num_seq += trainer.previous_minibatch_sample_count
                progress_bar.update(trainer.previous_minibatch_sample_count)
                if num_seq >= epoch_size:
                    break
            trainer.summarize_training_progress()
            trainer.save_checkpoint('../model/' + version + '/' + str(epoch))

        # validation
        num_seq = 0
        with tqdm(total=num_validation, ncols=79) as valid_progress_bar:
            while True:
                data = mb_valid.next_minibatch(minibatch_size,
                                               input_map=valid_map)
                if not data:
                    break
                trainer.test_minibatch(data)
                num_seq += len(data)
                valid_progress_bar.update(len(data))
                if num_seq >= num_validation:
                    break
            trainer.summarize_test_progress()
Beispiel #18
0
def create_learner(model):
    '''Create the optimized method'''
    lr_per_minibatch = C.learning_parameter_schedule(opt.lr)
    momentum_schedule = C.momentum_schedule_per_sample(0.9990913221888589)
    if opt.optim == 'sgd':
        return C.sgd(model.parameters, lr=lr_per_minibatch)
    elif opt.optim == 'adam':
        return C.adam(model.parameters, lr=lr_per_minibatch, momentum=momentum_schedule)
    elif opt.optim == 'adagrad':
        return C.adagrad(model.parameters, lr=lr_per_minibatch)
    else:
        raise RuntimeError("Invalid optim method: " + opt.optim)
def test_scalar_loss_function():
    import cntk as C

    x = C.input_variable((1,))
    l = C.input_variable((2,))
    proj = C.layers.Dense(2)(x)
    loss = C.reduce_sum(C.cross_entropy_with_softmax(proj, l), axis=C.Axis.all_axes()) * 1.0
    lr_per_sample = C.learning_parameter_schedule(0.1,  minibatch_size =1)
    trainer = C.Trainer(None, (loss, None), C.sgd(loss.parameters, lr_per_sample))
    result = trainer.train_minibatch({x : np.asarray([[.1], [-.1]], dtype=np.float32), l : np.asarray([[0, 1], [1, 0]], dtype=np.float32)})
    assert result
    assert trainer.total_number_of_samples_seen == 2
Beispiel #20
0
    def train(self):
        tmp_d = {"x": [], "y": []}
        num_list = []
        count = 0
        for idx, value in enumerate(self.series):
            if idx % self.h_dims == 0:
                num_list = []
                count += 1
                if (self.h_dims * count) > len(self.series):
                    break
            num_list.append(np.float32(value))
            increment_list = []
            for num in num_list:
                increment_list.append(num)
                tmp_d["x"].append(np.array(increment_list))
                tmp_d["y"].append(
                    np.array([np.float32(self.series[self.h_dims * count])]))

        x = {"train": tmp_d["x"]}
        y = {"train": np.array(tmp_d["y"])}

        z = self.create_model(self.input_node, self.h_dims)
        var_l = cntk.input_variable(1, dynamic_axes=z.dynamic_axes, name="y")
        learning_rate = 0.005
        lr_schedule = cntk.learning_parameter_schedule(learning_rate)
        loss = cntk.squared_error(z, var_l)
        error = cntk.squared_error(z, var_l)
        momentum_schedule = cntk.momentum_schedule(
            0.9, minibatch_size=self.batch_size)
        learner = cntk.fsadagrad(z.parameters,
                                 lr=lr_schedule,
                                 momentum=momentum_schedule)
        trainer = cntk.Trainer(z, (loss, error), [learner])

        # training
        loss_summary = []

        start = time.time()
        for epoch in range(0, self.epochs):
            for x_batch, l_batch in self.next_batch(x, y, "train",
                                                    self.batch_size):
                trainer.train_minibatch({
                    self.input_node: x_batch,
                    var_l: l_batch
                })
            if epoch % (self.epochs / 10) == 0:
                training_loss = trainer.previous_minibatch_loss_average
                loss_summary.append(training_loss)
                print("epoch: {}, loss: {:.4f} [time: {:.1f}s]".format(
                    epoch, training_loss,
                    time.time() - start))
        return z
Beispiel #21
0
def test_clone_freeze():
    inputs = 3
    outputs = 5

    features = C.input_variable((inputs), np.float32)
    label = C.input_variable((outputs), np.float32)
    weights = C.parameter((inputs, outputs))
    const_weights = C.constant(weights.value)
    z = C.times(features, weights)
    c = C.times(features, const_weights)
    z_clone = z.clone('freeze')
    c_clone = c.clone('freeze')

    # check that z and z_clone are the same
    for p, q in zip(z.parameters, z_clone.constants):
        assert np.array_equal(p.value, q.value)

    # check that c and c_clone are the same
    for p, q in zip(c.constants, c_clone.constants):
        assert np.array_equal(p.value, q.value)

    # keep copies of the old values
    z_copies = [q.value for q in z_clone.constants]
    c_copies = [q.value for q in c_clone.constants]

    # update z
    trainer = C.Trainer(
        z, C.squared_error(z, label),
        C.sgd(z.parameters, C.learning_parameter_schedule(1.0)))
    x = np.random.randn(16, 3).astype('f')
    y = np.random.randn(16, 5).astype('f')
    trainer.train_minibatch({features: x, label: y})
    # update c
    for cc in c.constants:
        cc.value = np.random.randn(*cc.value.shape).astype('f')

    # check that z changed
    for p, q in zip(z.parameters, z_clone.constants):
        assert not np.array_equal(p.value, q.value)

    # check that z_clone did not change
    for p, q in zip(z_copies, z_clone.constants):
        assert np.array_equal(p, q.value)

    # check that c changed
    for p, q in zip(c.constants, c_clone.constants):
        assert not np.array_equal(p.value, q.value)

    # check that c_clone did not change
    for p, q in zip(c_copies, c_clone.constants):
        assert np.array_equal(p, q.value)
def create_learner(model):
    '''Create the optimized method'''
    lr_per_minibatch = C.learning_parameter_schedule(opt.lr)
    momentum_schedule = C.momentum_schedule_per_sample(0.9990913221888589)
    if opt.optim == 'sgd':
        return C.sgd(model.parameters, lr=lr_per_minibatch)
    elif opt.optim == 'adam':
        return C.adam(model.parameters,
                      lr=lr_per_minibatch,
                      momentum=momentum_schedule)
    elif opt.optim == 'adagrad':
        return C.adagrad(model.parameters, lr=lr_per_minibatch)
    else:
        raise RuntimeError("Invalid optim method: " + opt.optim)
Beispiel #23
0
def lstm_basic(x, y, epochs=1000, batch_size=100, input_dim=5):

    x_axes = [C.Axis.default_batch_axis(), C.Axis.default_dynamic_axis()]
    C.input_variable(1, dynamic_axes=x_axes)

    # input sequences
    input_seq = C.sequence.input_variable(1)

    # create the model
    z = create_model(input_seq, input_dim)

    # expected output (label), also the dynamic axes of the model output
    # is specified as the model of the label input
    lb = C.input_variable(1, dynamic_axes=z.dynamic_axes, name="y")

    # the learning rate
    learning_rate = 0.02
    lr_schedule = C.learning_parameter_schedule(learning_rate)

    # loss function
    loss = C.squared_error(z, lb)

    # use squared error to determine error for now
    error = C.squared_error(z, lb)

    # use fsadagrad optimizer
    momentum_schedule = C.momentum_schedule(0.9, minibatch_size=batch_size)
    learner = C.fsadagrad(z.parameters,
                          lr=lr_schedule,
                          momentum=momentum_schedule,
                          unit_gain=True)

    trainer = C.Trainer(z, (loss, error), [learner])

    # train
    loss_summary = []
    start = time.time()
    for epoch in range(0, epochs):
        for x1, y1 in next_batch(x, y, "train", batch_size):
            trainer.train_minibatch({input_seq: x1, lb: y1})
        if epoch % (epochs / 10) == 0:
            training_loss = trainer.previous_minibatch_loss_average
            loss_summary.append(training_loss)
            print("epoch: {}, loss: {:.4f} [time: {:.1f}s]".format(
                epoch, training_loss,
                time.time() - start))
    print("training took {0:.1f} sec".format(time.time() - start))

    return z, trainer, input_seq
def train(nonlinearity, num_hidden_layers, device_id,
          minibatch_size=10, num_samples=1000):
    from cntk.cntk_py import always_allow_setting_default_device
    always_allow_setting_default_device()
    C.try_set_default_device(cntk_device(device_id))
    np.random.seed(0)

    learning_rate = 0.5
    lr_schedule = C.learning_parameter_schedule(learning_rate)

    hidden_layers_dim = 50

    inp = C.input_variable((input_dim), np.float32)
    label = C.input_variable((num_output_classes), np.float32)

    z = fully_connected_classifier_net(inp, num_output_classes, hidden_layers_dim,
                                       num_hidden_layers, nonlinearity)

    loss = C.cross_entropy_with_softmax(z, label)
    eval_error = C.classification_error(z, label)

    learner = C.sgd(z.parameters, lr_schedule, minibatch_size = 0)
    trainer = C.Trainer(z, (loss, eval_error), [learner])

    num_minibatches_to_train = int(num_samples / minibatch_size)

    training_progress_output_freq = 20

    losses = []
    errors = []

    for i in range(num_minibatches_to_train):
        features, labels = generate_random_data_sample(minibatch_size,
                                                       input_dim,
                                                       num_output_classes)

        # Specify the input variables mapping in the model to actual minibatch
        # data for training.
        trainer.train_minibatch({inp: features, label: labels},
                                device=cntk_device(device_id))

        batchsize, loss, error = print_training_progress(trainer, i,
                                                         training_progress_output_freq)

        if not (loss == "NA" or error == "NA"):
            losses.append(loss)
            errors.append(error)

    return losses, errors
Beispiel #25
0
def trainAndTestOneFold(model, modelLabel, features, labels, features_test, labels_test):
    input = model.arguments[0]

    # Training
    loss = C.cross_entropy_with_softmax(model, modelLabel)
    eval_error = C.classification_error(model, modelLabel)

    # Instantiate the trainer object to drive the model training
    lr_schedule = C.learning_parameter_schedule(learning_rate)
    learner = C.sgd(model.parameters, lr_schedule)
    trainer = C.Trainer(model, (loss, eval_error), [learner])

    # Run the trainer and perform model training
    training_progress_output_freq = 20

    plotdata = {"batchsize":[], "loss":[], "error":[]}
    for i in range(0, int(num_minibatches_to_train)):
        # Specify the input variables mapping in the model to actual minibatch data for training
        trainer.train_minibatch({input : features, modelLabel : labels})
        batchsize, loss, error = print_training_progress(trainer, i,
                                                         training_progress_output_freq, verbose=0)

        if not (loss == "NA" or error =="NA"):
            plotdata["batchsize"].append(batchsize)
            plotdata["loss"].append(loss)
            plotdata["error"].append(error)

    # Compute the moving average loss to smooth out the noise in SGD
    plotdata["avgloss"] = moving_average(plotdata["loss"])
    plotdata["avgerror"] = moving_average(plotdata["error"])

    # Graph data
    #showGraphs(plotdata)

    trainer.test_minibatch({input : features_test, modelLabel : labels_test})

    out = C.softmax(model)
    predicted_label_probs = out.eval({input : features_test})

    true_labels = [np.argmax(label) for label in labels_test]
    predicted_labels = [np.argmax(row) for row in predicted_label_probs]
    classificationRate, confusionMatrix = computeMetrics(true_labels, predicted_labels)

    print("Label    :", true_labels)
    print("Predicted:", predicted_labels)
    print("Precision: ", classificationRate)
    print("Confusion Matrix:\n", confusionMatrix)

    return (classificationRate, confusionMatrix)
Beispiel #26
0
    def create_trainer(self):
        learner = cntk.block_momentum_distributed_learner(
            cntk.momentum_sgd(self.output.parameters,
                              cntk.learning_parameter_schedule(0.0001),
                              cntk.momentum_as_time_constant_schedule(1000)),
            block_size=1000,
            block_learning_rate=0.01,
            block_momentum_as_time_constant=1000)

        comm_rank = cntk.distributed.Communicator.rank()
        self.trainer = cntk.Trainer(
            self.output, (self.ce, self.err), [learner], [
                cntk.logging.ProgressPrinter(
                    freq=progress_freq, tag="Training", rank=comm_rank)
            ])
Beispiel #27
0
def ffnet():
    inputs = 2
    outputs = 2
    layers = 2
    hidden_dimension = 50

    # input variables denoting the features and label data
    features = C.input_variable((inputs), np.float32)
    label = C.input_variable((outputs), np.float32)

    # Instantiate the feedforward classification model
    my_model = Sequential(
        [Dense(hidden_dimension, activation=C.sigmoid),
         Dense(outputs)])
    z = my_model(features)

    ce = C.cross_entropy_with_softmax(z, label)
    pe = C.classification_error(z, label)

    # Instantiate the trainer object to drive the model training
    lr_per_minibatch = C.learning_parameter_schedule(0.125)
    progress_printer = ProgressPrinter(0)
    trainer = C.Trainer(z, (ce, pe), [sgd(z.parameters, lr=lr_per_minibatch)],
                        [progress_printer])

    # Get minibatches of training data and perform model training
    minibatch_size = 25
    num_minibatches_to_train = 1024

    aggregate_loss = 0.0
    for i in range(num_minibatches_to_train):
        train_features, labels = generate_random_data(minibatch_size, inputs,
                                                      outputs)
        # Specify the mapping of input variables in the model to actual minibatch data to be trained with
        trainer.train_minibatch({features: train_features, label: labels})
        sample_count = trainer.previous_minibatch_sample_count
        aggregate_loss += trainer.previous_minibatch_loss_average * sample_count

    last_avg_error = aggregate_loss / trainer.total_number_of_samples_seen

    test_features, test_labels = generate_random_data(minibatch_size, inputs,
                                                      outputs)
    avg_error = trainer.test_minibatch({
        features: test_features,
        label: test_labels
    })
    print(' error rate on an unseen minibatch: {}'.format(avg_error))
    return last_avg_error, avg_error
Beispiel #28
0
def test_learner_update():
    i = C.input_variable(shape=(1,), needs_gradient=True, name='a')
    w_init = 1
    w = parameter(shape=(1,), init=w_init)
    res = i * w

    learner = sgd(res.parameters, lr=C.learning_parameter_schedule([0.1]*50 + [0.2]*50, minibatch_size = 1, epoch_size=1))
    assert learner.learning_rate() == 0.1
    x = learner.update({w: np.asarray([[2.]], dtype=np.float32)}, 100)
    assert learner.learning_rate() == 0.2
    assert w.value < w_init

    learner.reset_learning_rate(learning_parameter_schedule([0.3]*50 + [0.4]*50, minibatch_size = 1, epoch_size=1));
    assert learner.learning_rate() == 0.3
    x = learner.update({w: np.asarray([[2.]], dtype=np.float32)}, 100)
    assert learner.learning_rate() == 0.4
Beispiel #29
0
def test_learner_update():
    i = C.input_variable(shape=(1,), needs_gradient=True, name='a')
    w_init = 1
    w = parameter(shape=(1,), init=w_init)
    res = i * w

    learner = sgd(res.parameters, lr=C.learning_parameter_schedule([0.1]*50 + [0.2]*50, minibatch_size = 1, epoch_size=1))
    assert learner.learning_rate() == 0.1
    x = learner.update({w: np.asarray([[2.]], dtype=np.float32)}, 100)
    assert learner.learning_rate() == 0.2
    assert w.value < w_init

    learner.reset_learning_rate(learning_parameter_schedule([0.3]*50 + [0.4]*50, minibatch_size = 1, epoch_size=1));
    assert learner.learning_rate() == 0.3
    x = learner.update({w: np.asarray([[2.]], dtype=np.float32)}, 100)
    assert learner.learning_rate() == 0.4
Beispiel #30
0
    def train (self, train_file, output_resources_pickle_file, \
        network_type = 'unidirectional', \
        num_epochs = 1, batch_size = 50, \
        dropout = 0.2, reg_alpha = 0.0, \
        num_hidden_units = 150, num_layers = 1):
        
        train_X, train_Y = self.reader.read_and_parse_training_data(train_file, output_resources_pickle_file) 

        print("Data Shape: ")
        print(train_X.shape) # (15380, 613)
        print(train_Y.shape) # (15380, 613, 8)      
        #self.wordvecs.shape (66962, 50)
        
        print("Hyper parameters:")
        print("output_resources_pickle_file = {}".format(output_resources_pickle_file))
        print("network_type = {}".format(network_type))
        print("num_epochs= {}".format(num_epochs ))
        print("batch_size = {}".format(batch_size ))
        print("dropout = ".format(dropout ))
        print("reg_alpha = {}".format(reg_alpha ))
        print("num_hidden_units = {}".format(num_hidden_units))
        print("num_layers = {}".format(num_layers ))

        # Instantiate the model function;
        features = C.sequence.input_variable(self.wordvecs.shape[0])
        labels = C.input_variable(train_Y.shape[2], dynamic_axes=[C.Axis.default_batch_axis()])
        self.model = self.__create_model(features, train_Y.shape[2], num_hidden_units, dropout)

        plot_path = "./lstm_model.png"
        plot(self.model, plot_path)        
        
        # Instantiate the loss and error function
        loss = C.cross_entropy_with_softmax(self.model, labels)
        error = C.classification_error(self.model, labels)

        # LR schedule
        learning_rate = 0.02
        lr_schedule = C.learning_parameter_schedule(learning_rate)
        momentum_schedule = C.momentum_schedule(0.9, minibatch_size=batch_size)
        learner = C.fsadagrad(self.model.parameters, lr = lr_schedule, momentum = momentum_schedule, unit_gain = True)        

        # Setup the progress updater
        progress_printer = C.logging.ProgressPrinter(freq=100, first=10, tag='Training', num_epochs=num_epochs)

        # Instantiate the trainer. We have all data in memory. https://github.com/Microsoft/CNTK/blob/master/Manual/Manual_How_to_feed_data.ipynb
        print('Start training')       
        train_summary = loss.train((train_X.astype('float32'), train_Y.astype('float32')), parameter_learners=[learner], callbacks=[progress_printer])
def create_trainer(network, epoch_size, num_quantization_bits, progress_printer):
    # Set learning parameters
    lr_per_mb         = [0.01]*20 + [0.001]*20 + [0.0001]*20 + [0.00001]*10 + [0.000001]
    lr_schedule       = C.learning_parameter_schedule(lr_per_mb, epoch_size=epoch_size)
    mm_schedule       = C.learners.momentum_schedule(0.9)
    l2_reg_weight     = 0.0005 # CNTK L2 regularization is per sample, thus same as Caffe

    # Create learner
    local_learner = C.learners.momentum_sgd(network['output'].parameters, lr_schedule, mm_schedule, unit_gain=False, l2_regularization_weight=l2_reg_weight)
    # Since we reuse parameter settings (learning rate, momentum) from Caffe, we set unit_gain to False to ensure consistency
    parameter_learner = data_parallel_distributed_learner(
        local_learner,
        num_quantization_bits=num_quantization_bits,
        distributed_after=0)

    # Create trainer
    return C.Trainer(network['output'], (network['ce'], network['pe']), parameter_learner, progress_printer)
Beispiel #32
0
    def _create(self, hidden):
        observation = C.input_variable(STATE_COUNT, name="s")
        q_target = C.input_variable(ACTION_COUNT, name="q")

        model = C.layers.Dense(hidden, activation=C.relu)(observation)
        model = C.layers.Dense(ACTION_COUNT)(model)

        # loss='mse'
        loss = C.reduce_mean(C.square(model - q_target)) #, axis=0)

        # optimizer
        lr = 0.00025
        lr_schedule = C.learning_parameter_schedule(lr)
        learner = C.sgd(model.parameters, lr_schedule, gradient_clipping_threshold_per_sample=10)
        trainer = C.Trainer(model, (loss, None), learner)

        return model, trainer, loss
Beispiel #33
0
def _train(z, loss, eval_error, f_input, l_input, num_output_classes, steps):
    np.random.seed(0)

    input_dim = 2

    lr_schedule = C.learning_parameter_schedule(0.5)
    #now we want the learning be compatible with the way in the literature without the per sample benefit:
    learner = sgd(z.parameters, lr_schedule, minibatch_size=C.learners.IGNORE)
    trainer = Trainer(z, (loss, eval_error), [learner])

    minibatch_size = 10

    for i in range(steps):
        features, labels = _generate_random_data_sample(
            minibatch_size, input_dim, num_output_classes)

        trainer.train_minibatch({f_input: features, l_input: labels})
Beispiel #34
0
    def batch_step(self, previous_minibatch_loss=None):
        """
        Updates learners with new learning rate after one training iteration is complete.
        Must be called once for every training iteration/update.
        """

        self.last_batch_iteration += 1
        lr = self.get_lr()
        self.current_lr = lr
        
        # loss and learn rate gets recorded in pre-training mode
        if self.record_history and previous_minibatch_loss:
            self.loss.append(previous_minibatch_loss)
            self.lrs.append(lr)

        self.parameter_learner.reset_learning_rate(C.learning_parameter_schedule(lr, minibatch_size=self.minibatch_size))
        return None
def test_output_to_retain():
    in1 = C.input_variable(shape=(1,))
    labels = C.input_variable(shape=(1,))
    p = parameter(shape=(2,), init=10)
    z = plus(in1, reduce_sum(p), name='z')
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error(z, labels)
    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_parameter_schedule(0.007, minibatch_size =1)
    trainer = C.Trainer(z, (ce, errs),
            [C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)])
    in1_value = [[1], [2]]
    label_value = [[0], [1]]
    arguments = {in1: in1_value, labels: label_value}
    z_output = z.output
    updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output])
    assert np.allclose(var_map[z_output], np.asarray(in1_value)+20)
def test_model_one_output_of_multi_output_function():
    input_dim = 2
    proj_dim = 11
    x = C.input_variable((input_dim,))

    x_placeholder = C.placeholder()
    w = parameter((input_dim, proj_dim))
    b = parameter((proj_dim,))
    proj = times(x_placeholder, w)
    proj_plus_bias = proj + b
    combined_model = as_block(C.combine([proj, proj_plus_bias]), [(x_placeholder, x)], 'dense_op')

    labels = C.input_variable((proj_dim,))
    lr_schedule = C.learning_parameter_schedule(0.003,  minibatch_size =1)
    ce = cross_entropy_with_softmax(combined_model.outputs[0], labels)
    pe = classification_error(combined_model.outputs[0], labels)
    trainer_multitask = C.Trainer(combined_model.outputs[0], (ce, pe), C.sgd(ce.parameters, lr=lr_schedule))
Beispiel #37
0
def test_ext_lambdafunc(tmpdir):
    dim = 4

    class CallbackCounter(object):
        def __init__(self):
            self.count = 0

        def inc(self, arg):
            self.count += 1

    cb = CallbackCounter()

    p = C.parameter(shape=(dim,), init=1)
    i = C.input_variable(dim, needs_gradient=True, name='i_var')
    k = i * p
    m = LambdaFunc(k,
                   when=lambda arg: np.sum(arg) > 1,
                   execute=cb.inc)
    m = C.user_function(m)
    z0 = m + 0

    filepath = str(tmpdir / 'test_ext_lambdafunc.dat')
    z0.save(filepath)

    Function.register_udf_deserialize_callback('conditional_exec_lambda',
                                               lambda x, *unused: LambdaFunc(x, when=lambda arg: np.sum(arg) > 1, execute=cb.inc))

    z = Function.load(filepath)

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_parameter_schedule(0.007, minibatch_size = 1)
    trainer = C.Trainer(z, (z + 0, z + 0), [C.momentum_sgd(z.parameters,
                                                           lr_per_sample,
                                                           momentum_time_constant,
                                                           True)])

    i = 0
    input_data = 0.1 * np.ones(dim)
    trainer.train_minibatch([input_data])
    assert cb.count == 0

    input_data = 0.3 * np.ones(dim)
    trainer.train_minibatch([input_data])
    assert cb.count == 1
Beispiel #38
0
def test_ext_train(tmpdir):
    dim = 4

    p = C.parameter(shape=(dim,), init=10)
    i = C.sequence.input_variable(dim, needs_gradient=True, name='i_var')
    m = MyPlus(i, C.constant(3), 'my_plus')
    # keeping m unwrapped since we need to access its member variables
    z = C.user_function(m) + p

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_parameter_schedule(0.007, minibatch_size = 1)
    trainer = C.Trainer(z, (z + 0, z + 0),
                        [C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant,
                                        True, minibatch_size = 0)])

    i = 0
    while i < 100:
        i += 1
        input_data = np.random.rand(dim)
        trainer.train_minibatch([input_data])

    assert m.forward_calls == m.backward_calls == 100

    filepath = str(tmpdir / 'test_ext_train.dat')

    z.save(filepath)

    buf = open(filepath, 'rb').read()

    # this is only need for Python 2.7
    # (which does not distinguish between bytes and strings)
    if isinstance(buf, str):
        buf = bytearray(buf)

    z1 = Function.load(buf)

    m1 = z1.find_by_name('my_plus')
    # m1 is an instance of UserFunction, cannot directly downcast it to MyPlus,
    # using serialize as workaround:
    state = m1.serialize()['state']

    assert state['forward_calls'] == state['backward_calls'] == 100
def ffnet():
    inputs = 2
    outputs = 2
    layers = 2
    hidden_dimension = 50

    # input variables denoting the features and label data
    features = C.input_variable((inputs), np.float32)
    label = C.input_variable((outputs), np.float32)

    # Instantiate the feedforward classification model
    my_model = Sequential ([
                    Dense(hidden_dimension, activation=C.sigmoid),
                    Dense(outputs)])
    z = my_model(features)

    ce = C.cross_entropy_with_softmax(z, label)
    pe = C.classification_error(z, label)

    # Instantiate the trainer object to drive the model training
    lr_per_minibatch = C.learning_parameter_schedule(0.125)
    progress_printer = ProgressPrinter(0)
    trainer = C.Trainer(z, (ce, pe), [sgd(z.parameters, lr=lr_per_minibatch)], [progress_printer])

    # Get minibatches of training data and perform model training
    minibatch_size = 25
    num_minibatches_to_train = 1024

    aggregate_loss = 0.0
    for i in range(num_minibatches_to_train):
        train_features, labels = generate_random_data(minibatch_size, inputs, outputs)
        # Specify the mapping of input variables in the model to actual minibatch data to be trained with
        trainer.train_minibatch({features : train_features, label : labels})
        sample_count = trainer.previous_minibatch_sample_count
        aggregate_loss += trainer.previous_minibatch_loss_average * sample_count

    last_avg_error = aggregate_loss / trainer.total_number_of_samples_seen

    test_features, test_labels = generate_random_data(minibatch_size, inputs, outputs)
    avg_error = trainer.test_minibatch({features : test_features, label : test_labels})
    print(' error rate on an unseen minibatch: {}'.format(avg_error))
    return last_avg_error, avg_error
Beispiel #40
0
def _train(z, loss, eval_error,
           f_input, l_input,
           num_output_classes,
           steps):
    np.random.seed(0)

    input_dim = 2

    lr_schedule = C.learning_parameter_schedule(0.5)
    #now we want the learning be compatible with the way in the literature without the per sample benefit:
    learner = sgd(z.parameters, lr_schedule, minibatch_size = C.learners.IGNORE)
    trainer = Trainer(z, (loss, eval_error), [learner])

    minibatch_size = 10

    for i in range(steps):
        features, labels = _generate_random_data_sample(
            minibatch_size, input_dim, num_output_classes)

        trainer.train_minibatch({f_input: features, l_input: labels})
Beispiel #41
0
def _train_backcompatible_test(z, loss, eval_error,
           f_input, l_input,
           num_output_classes,
           steps):
    np.random.seed(0)

    input_dim = 2

    lr_schedule = learning_parameter_schedule(0.5)

    learner = sgd(z.parameters, lr_schedule)
    trainer = Trainer(z, (loss, eval_error), [learner])

    minibatch_size = 10

    for i in range(steps):
        features, labels = _generate_random_data_sample(
            minibatch_size, input_dim, num_output_classes)

        trainer.train_minibatch({f_input: features, l_input: labels})
Beispiel #42
0
def test_udf_checkpointing(tmpdir):
    dev, w_value, c1_value, c2_value, op = build_test_function()

    label = C.constant(np.asarray([[1, 2], [3, 4]]).astype(np.float32))

    loss = C.cross_entropy_with_softmax(op, label)
    eval_error = C.classification_error(op, label)

    lr_schedule = C.learning_parameter_schedule(0.5)
    learner = C.sgd(op.parameters, lr_schedule, minibatch_size = 0)
    trainer = C.Trainer(op, (loss, eval_error), [learner])

    trainer.train_minibatch({op.arguments[0]: np.random.random((2, 2)).astype(np.float32)}, device=dev)

    filepath = str(tmpdir / 'test_checkpointing.out')

    trainer.save_checkpoint(filepath, external_state={'test': 'test'})

    d = C.cntk_py.Dictionary.load(filepath)
    assert len(d.keys()) != 0
Beispiel #43
0
def run_distributed_training(tmpdir, create_func):

    in1 = sequence.input_variable(shape=1)
    labels = sequence.input_variable(shape=1)
    p = parameter(shape=2, init=10)
    z = plus(in1, reduce_sum(p), name='z')
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error(z, labels)

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_parameter_schedule(0.007, 1)
    dist_learner = create_func(C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True))

    communicator = dist_learner.communicator()
    workers = communicator.workers()
    current_worker = communicator.current_worker()
    found_rank = False
    for wk in workers:
        if current_worker.global_rank == wk.global_rank:
            found_rank = True

    assert found_rank

    trainer = C.Trainer(z, (ce, errs), [ dist_learner ])
    in1_value = [[1],[2]]
    label_value = [[0], [1]]
    arguments = {in1: in1_value, labels: label_value}
    z_output = z.output
    updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output])
    
    p = str(tmpdir / 'checkpoint.dat')
    trainer.save_checkpoint(p)
    trainer.restore_from_checkpoint(p)

    communicator.barrier()

    assert trainer.model.name == 'z'

    # Ensure that Swig is not leaking raw types
    assert isinstance(trainer.model, Function)
    assert trainer.model.__doc__
def create_trainer(network, epoch_size, num_quantization_bits, printer, block_size, warm_up, minibatch_size):
    # Set learning parameters
    lr_per_mb         = [0.01]*25 + [0.001]*25 + [0.0001]*25 + [0.00001]*25 + [0.000001]
    lr_schedule       = C.learning_parameter_schedule(lr_per_mb, minibatch_size=minibatch_size, epoch_size=epoch_size)
    mm_schedule       = C.learners.momentum_schedule(0.9, minibatch_size=minibatch_size)
    l2_reg_weight     = 0.0005 # CNTK L2 regularization is per sample, thus same as Caffe

    if block_size != None and num_quantization_bits != 32:
        raise RuntimeError("Block momentum cannot be used with quantization, please remove quantized_bits option.")

    # Create learner
    local_learner = C.learners.momentum_sgd(network['output'].parameters, lr_schedule, mm_schedule, minibatch_size=minibatch_size, unit_gain=False, l2_regularization_weight=l2_reg_weight)
    # Since we reuse parameter settings (learning rate, momentum) from Caffe, we set unit_gain to False to ensure consistency

    # Create trainer
    if block_size != None:
        parameter_learner = block_momentum_distributed_learner(local_learner, block_size=block_size)
    else:
        parameter_learner = data_parallel_distributed_learner(local_learner, num_quantization_bits=num_quantization_bits, distributed_after=warm_up)

    return C.Trainer(network['output'], (network['ce'], network['pe']), parameter_learner, printer)
W = cntk.Parameter((input_dim, num_classes), init=cntk.glorot_uniform(), name='W')
b = cntk.Parameter((num_classes,), init=0, name='b')
model = cntk.times(data, W) + b

# Define the CNTK criterion function. A criterion function maps
# (input vectors, labels) to a loss function and an optional additional
# metric. The loss function is used to train the model parameters.
# We use cross entropy as a loss function.
label_one_hot = cntk.input_variable(num_classes, is_sparse=True)
loss   = cntk.cross_entropy_with_softmax(model, label_one_hot) # this applies softmax to model's output under the hood
metric = cntk.classification_error(model, label_one_hot)
criterion = cntk.combine([loss, metric]) # criterion is a tuple-valued function (loss, metric)

# Learner object. The learner implements the update algorithm, in this case plain SGD.
learning_rate = 0.1
learner = cntk.sgd(model.parameters, cntk.learning_parameter_schedule(learning_rate))

# Trainer.
minibatch_size = 32
progress_writer = cntk.logging.ProgressPrinter(50) # helper for logging progress; log every 50 minibatches
trainer = cntk.Trainer(None, criterion, [learner], [progress_writer])

# Train!
for i in range(0, len(X_train), minibatch_size): # loop over minibatches
    x = X_train[i:i+minibatch_size] # get one minibatch worth of data
    y = Y_train[i:i+minibatch_size]
    trainer.train_minibatch({data: x, label_one_hot: y})  # update model from one minibatch
trainer.summarize_training_progress()

# Test error rate on the test set.
evaluator = cntk.Evaluator(metric, [progress_writer])
Beispiel #46
0
def train(data_path, model_path, log_file, config_file, restore=False, profiling=False, gen_heartbeat=False):
    polymath = PolyMath(config_file)
    z, loss = polymath.model()
    training_config = importlib.import_module(config_file).training_config

    max_epochs = training_config['max_epochs']
    log_freq = training_config['log_freq']

    progress_writers = [C.logging.ProgressPrinter(
                            num_epochs = max_epochs,
                            freq = log_freq,
                            tag = 'Training',
                            log_to_file = log_file,
                            rank = C.Communicator.rank(),
                            gen_heartbeat = gen_heartbeat)]

    lr = C.learning_parameter_schedule(training_config['lr'], minibatch_size=None, epoch_size=None)

    ema = {}
    dummies = []
    for p in z.parameters:
        ema_p = C.constant(0, shape=p.shape, dtype=p.dtype, name='ema_%s' % p.uid)
        ema[p.uid] = ema_p
        dummies.append(C.reduce_sum(C.assign(ema_p, 0.999 * ema_p + 0.001 * p)))
    dummy = C.combine(dummies)

    learner = C.adadelta(z.parameters, lr)

    if C.Communicator.num_workers() > 1:
        learner = C.data_parallel_distributed_learner(learner)

    tensorboard_writer = TensorBoardProgressWriter(freq=10, log_dir='log', model=z)
    trainer = C.Trainer(z, (loss, None), learner, tensorboard_writer)

    if profiling:
        C.debugging.start_profiler(sync_gpu=True)

    train_data_file = os.path.join(data_path, training_config['train_data'])
    train_data_ext = os.path.splitext(train_data_file)[-1].lower()

    model_file = os.path.join(model_path, model_name)
    model = C.combine(list(z.outputs) + [loss.output])
    label_ab = argument_by_name(loss, 'ab')

    epoch_stat = {
        'best_val_err' : 100,
        'best_since'   : 0,
        'val_since'    : 0}

    if restore and os.path.isfile(model_file):
        trainer.restore_from_checkpoint(model_file)
        #after restore always re-evaluate
        epoch_stat['best_val_err'] = validate_model(os.path.join(data_path, training_config['val_data']), model, polymath)

    def post_epoch_work(epoch_stat):
        trainer.summarize_training_progress()
        epoch_stat['val_since'] += 1

        if epoch_stat['val_since'] == training_config['val_interval']:
            epoch_stat['val_since'] = 0
            temp = dict((p.uid, p.value) for p in z.parameters)
            for p in trainer.model.parameters:
                p.value = ema[p.uid].value
            val_err = validate_model(os.path.join(data_path, training_config['val_data']), model, polymath)
            if epoch_stat['best_val_err'] > val_err:
                epoch_stat['best_val_err'] = val_err
                epoch_stat['best_since'] = 0
                trainer.save_checkpoint(model_file)
                for p in trainer.model.parameters:
                    p.value = temp[p.uid]
            else:
                epoch_stat['best_since'] += 1
                if epoch_stat['best_since'] > training_config['stop_after']:
                    return False

        if profiling:
            C.debugging.enable_profiler()

        return True

    if train_data_ext == '.ctf':
        mb_source, input_map = create_mb_and_map(loss, train_data_file, polymath)

        minibatch_size = training_config['minibatch_size'] # number of samples
        epoch_size = training_config['epoch_size']

        for epoch in range(max_epochs):
            num_seq = 0
            while True:
                if trainer.total_number_of_samples_seen >= training_config['distributed_after']:
                    data = mb_source.next_minibatch(minibatch_size*C.Communicator.num_workers(), input_map=input_map, num_data_partitions=C.Communicator.num_workers(), partition_index=C.Communicator.rank())
                else:
                    data = mb_source.next_minibatch(minibatch_size, input_map=input_map)

                trainer.train_minibatch(data)
                num_seq += trainer.previous_minibatch_sample_count
                dummy.eval()
                if num_seq >= epoch_size:
                    break
            if not post_epoch_work(epoch_stat):
                break
    else:
        if train_data_ext != '.tsv':
            raise Exception("Unsupported format")

        minibatch_seqs = training_config['minibatch_seqs'] # number of sequences

        for epoch in range(max_epochs):       # loop over epochs
            tsv_reader = create_tsv_reader(loss, train_data_file, polymath, minibatch_seqs, C.Communicator.num_workers())
            minibatch_count = 0
            for data in tsv_reader:
                if (minibatch_count % C.Communicator.num_workers()) == C.Communicator.rank():
                    trainer.train_minibatch(data) # update model with it
                    dummy.eval()
                minibatch_count += 1
            if not post_epoch_work(epoch_stat):
                break

    if profiling:
        C.debugging.stop_profiler()
def mem_leak_check(nonlinearity, num_hidden_layers, device_id,
                   minibatch_size=1, num_samples=10000):
    from cntk.cntk_py import always_allow_setting_default_device
    always_allow_setting_default_device()
    C.try_set_default_device(cntk_device(device_id))
    np.random.seed(0)

    learning_rate = 0.5
    lr_schedule = C.learning_parameter_schedule(learning_rate)

    hidden_layers_dim = 50

    inp = C.input_variable((input_dim), np.float32)
    label = C.input_variable((num_output_classes), np.float32)

    z = fully_connected_classifier_net(inp, num_output_classes, hidden_layers_dim,
                                       num_hidden_layers, nonlinearity)

    loss = C.cross_entropy_with_softmax(z, label)
    eval_error = C.classification_error(z, label)

    learner = C.sgd(z.parameters, lr_schedule, minibatch_size = 0)
    trainer = C.Trainer(z, (loss, eval_error), [learner])

    num_minibatches_to_train = int(num_samples / minibatch_size)

    mem = np.zeros(num_minibatches_to_train)

    features, labels = generate_random_data_sample(minibatch_size,
                                                   input_dim,
                                                   num_output_classes)

    # Set a maximum fraction of iterations, in which the memory is allowed to
    # increase. Most likely these will be the first training runs.
    # Long-term this test needs to be run in a separate process over a longer
    # period of time.
    MEM_INCREASE_FRACTION_TOLERANCE = 0.01
    # Set a maximum allowed memory increase. This tolerance should not be
    # exceeded when run as a standalone process (simply run this file with the
    # Python executable).
    MEM_INCREASE_TOLERANCE = 10*1024

    dev = cntk_device(device_id)
    i = 0
    proc = os_process()
    while i < num_minibatches_to_train:
        mem[i] = mem_used(proc)

        # Specify the input variables mapping in the model to actual minibatch
        # data for training.
        trainer.train_minibatch({inp: features, label: labels},
                                device=dev)
        i += 1

    mem_deltas = np.diff(mem)
    iterations_with_mem_increase = (mem_deltas > 0).sum()
    mem_inc_fraction = iterations_with_mem_increase/num_minibatches_to_train
    mem_diff = mem[-1] - mem[10]

    if mem_inc_fraction > MEM_INCREASE_FRACTION_TOLERANCE and \
            mem_diff > MEM_INCREASE_TOLERANCE:
        # For the rough leak estimation we take the memory footprint after the
        # dust of the first train_minibatch runs has settled.
        mem_changes = mem_deltas[mem_deltas != 0]
        raise ValueError('Potential memory leak of ~ %i KB (%i%% of MBs '
                         'increased memory usage) detected with %s:\n%s' %
                         (int(mem_diff/1024), int(mem_inc_fraction*100),
                             nonlinearity, mem_changes))
 def create_trainer(self):
     try:
         learner = cntk.block_momentum_distributed_learner(cntk.momentum_sgd(self.output.parameters, cntk.learning_parameter_schedule(0.0001), cntk.momentum_as_time_constant_schedule(1000)), 
                                                           block_size=1000, block_learning_rate=0.01, block_momentum_as_time_constant=1000)
         
         comm_rank = cntk.distributed.Communicator.rank()
         self.trainer = cntk.Trainer(self.output, (self.ce, self.err), [learner], [cntk.logging.ProgressPrinter(freq=progress_freq, tag="Training", rank=comm_rank)])
     except RuntimeError:
         self.trainer = None
     return
Beispiel #49
0
def one_step_sgd(loss, data, lr=0.1):
    learner = C.sgd(loss.parameters,
                    C.learning_parameter_schedule(lr))
    trainer = C.train.Trainer(loss, (loss, loss), learner, C.logging.ProgressPrinter(freq=0))
    trainer.train_minibatch(data)