def test_op_times_reduce_sequence_axis(device_id, precision):
    dt_precision = PRECISION_TO_TYPE[precision]

    from cntk import times, Value, TIMES_REDUCE_SEQUENCE_AXIS_WITHOUT_INFERRED_INPUT_RANK
    from cntk import sequence
    dim = 10
    seq = [[0,1,2], [3], [4,5,6,7,8,9]]
    right_data = Value.one_hot(seq, dim, dtype=dt_precision)
    right_var = sequence.input_variable(shape=(dim), is_sparse=True, dtype=dt_precision)
    left_data = [AA([1,1,1],dtype=dt_precision), AA([1],dtype=dt_precision), AA([1,1,1,1,1,1],dtype=dt_precision)]
    left_var = sequence.input_variable(shape=(1), dtype=dt_precision)

    func = times(left_var, right_var, infer_input_rank_to_map=TIMES_REDUCE_SEQUENCE_AXIS_WITHOUT_INFERRED_INPUT_RANK)
    func2 = sequence.reduce_sum(times(left_var, right_var))

    assert func.dynamic_axes == func2.dynamic_axes

    _, forward_output = func.forward({left_var:left_data, right_var:right_data})
    
    actual_forward = forward_output[func.output]

    expected_forward = AA([[[1,1,1,0,0,0,0,0,0,0]],
                           [[0,0,0,1,0,0,0,0,0,0]],
                           [[0,0,0,0,1,1,1,1,1,1]]])
    
    assert np.allclose(actual_forward, expected_forward)
def test_model_not_criterion_subset():
    input_dim = 2
    proj_dim = 11
    model1_dim = 3
    model2_dim = 4
    x = sequence.input_variable((input_dim,))

    core = C.layers.Embedding(proj_dim)
    model1 = C.layers.Dense(model1_dim)(sequence.last(core(x)))
    model1_label = C.input_variable((model1_dim,))
    ce_model1 = cross_entropy_with_softmax(model1, model1_label)
    pe_model1 = classification_error(model1, model1_label)

    model2 = C.layers.Dense(model2_dim)(core(x))
    model2_label = sequence.input_variable((model2_dim,))
    ce_model2 = cross_entropy_with_softmax(model2, model2_label)
    pe_model2 = classification_error(model2, model2_label)

    ce = 0.5 * sequence.reduce_sum(ce_model2) + 0.5 * ce_model1

    lr_schedule = C.learning_parameter_schedule(0.003, minibatch_size =1)
    trainer_multitask = C.Trainer(model1, (ce, pe_model1), C.sgd(ce.parameters, lr=lr_schedule))

    x_data = np.asarray([[2., 1.], [1., 2.]], np.float32)
    model1_label_data = np.asarray([1., 0., 0.], np.float32)
    model2_label_data = np.asarray([[0., 1., 0., 0.], [0., 0., 0., 1.]], np.float32)
    trainer_multitask.train_minibatch({x : [x_data], model1_label : [model1_label_data], model2_label : [model2_label_data]})
Exemple #3
0
def test_op_times_reduce_sequence_axis(device_id, precision):
    dt_precision = PRECISION_TO_TYPE[precision]

    from cntk import times, Value, TIMES_REDUCE_SEQUENCE_AXIS_WITHOUT_INFERRED_INPUT_RANK
    from cntk import sequence
    dim = 10
    seq = [[0,1,2], [3], [4,5,6,7,8,9]]
    right_data = Value.one_hot(seq, dim, dtype=dt_precision)
    right_var = sequence.input_variable(shape=(dim), is_sparse=True, dtype=dt_precision)
    left_data = [AA([1,1,1],dtype=dt_precision), AA([1],dtype=dt_precision), AA([1,1,1,1,1,1],dtype=dt_precision)]
    left_var = sequence.input_variable(shape=(1), dtype=dt_precision)

    func = times(left_var, right_var, infer_input_rank_to_map=TIMES_REDUCE_SEQUENCE_AXIS_WITHOUT_INFERRED_INPUT_RANK)
    func2 = sequence.reduce_sum(times(left_var, right_var))

    assert func.dynamic_axes == func2.dynamic_axes

    _, forward_output = func.forward({left_var:left_data, right_var:right_data})
    
    actual_forward = forward_output[func.output]

    expected_forward = AA([[[1,1,1,0,0,0,0,0,0,0]],
                           [[0,0,0,1,0,0,0,0,0,0]],
                           [[0,0,0,0,1,1,1,1,1,1]]])
    
    assert np.allclose(actual_forward, expected_forward)
def create_sample_model(device, writer=None,
                        lr_per_sample=C.learning_parameter_schedule_per_sample([0.3, 0.2, 0.1, 0.0])):
    in1 = sequence.input_variable(shape=(input_dim,))
    labels = sequence.input_variable(shape=(input_dim,))
    p = parameter(shape=(input_dim,), init=10, device=device)
    z = plus(in1, reduce_sum(p), name='z')
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error(z, labels)

    learner = C.sgd(z.parameters, lr_per_sample)
    trainer = C.Trainer(z, (ce, errs), [learner], writer)
    return (trainer, in1, labels)
Exemple #5
0
def create_sample_model(device, writer=None):
    in1 = sequence.input_variable(shape=(input_dim, ))
    labels = sequence.input_variable(shape=(input_dim, ))
    p = parameter(shape=(input_dim, ), init=10, device=device)
    z = plus(in1, reduce_sum(p), name='z')
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error(z, labels)

    lr_per_sample = C.learning_rate_schedule([0.3, 0.2, 0.1, 0.0],
                                             C.UnitType.sample)
    learner = C.sgd(z.parameters, lr_per_sample)
    trainer = C.Trainer(z, (ce, errs), [learner], writer)
    return (trainer, in1, labels)
Exemple #6
0
def test_usermbsource_training(tmpdir):
    input_dim = 1000
    num_output_classes = 5

    mbs = MyDataSource(input_dim, num_output_classes)

    from cntk import sequence, parameter, plus, cross_entropy_with_softmax, \
            classification_error, learning_rate_schedule, sgd, Trainer, \
            training_session, times, UnitType

    feature = sequence.input_variable(shape=(input_dim, ))
    label = C.input_variable(shape=(num_output_classes, ))
    p = parameter(shape=(input_dim, num_output_classes), init=10)
    z = times(sequence.reduce_sum(feature), p, name='z')
    ce = cross_entropy_with_softmax(z, label)
    errs = classification_error(z, label)

    lr_per_sample = learning_rate_schedule([0.3, 0.2, 0.1, 0.0],
                                           UnitType.sample)
    learner = sgd(z.parameters, lr_per_sample)
    trainer = Trainer(z, (ce, errs), [learner])
    input_map = {feature: mbs.fsi, label: mbs.lsi}

    session = training_session(trainer=trainer,
                               mb_source=mbs,
                               model_inputs_to_streams=input_map,
                               mb_size=4,
                               max_samples=20)
    session.train()

    assert trainer.total_number_of_samples_seen == 20
Exemple #7
0
def test_sanitize_batch_sparse():
    batch = [csr([[1, 0, 2], [2, 3, 0]]), csr([5, 0, 1])]

    var = sequence.input_variable(3, is_sparse=True)
    b = sanitize_batch(var, batch)
    # 2 sequences, with max seq len of 2 and dimension 3
    assert b.shape == (2, 2, 3)
Exemple #8
0
def test_eval_sparse_dense(tmpdir, device_id):
    from cntk import Axis
    from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs
    from cntk.ops import times

    input_vocab_dim = label_vocab_dim = 69

    ctf_data = '''\
0	|S0 3:1 |# <s>	|S1 3:1 |# <s>
0	|S0 4:1 |# A	|S1 32:1 |# ~AH
0	|S0 5:1 |# B	|S1 36:1 |# ~B
0	|S0 4:1 |# A	|S1 31:1 |# ~AE
0	|S0 7:1 |# D	|S1 38:1 |# ~D
0	|S0 12:1 |# I	|S1 47:1 |# ~IY
0	|S0 1:1 |# </s>	|S1 1:1 |# </s>
2	|S0 60:1 |# <s>	|S1 3:1 |# <s>
2	|S0 61:1 |# A	|S1 32:1 |# ~AH
'''
    ctf_file = str(tmpdir / '2seqtest.txt')
    with open(ctf_file, 'w') as f:
        f.write(ctf_data)

    mbs = MinibatchSource(CTFDeserializer(
        ctf_file,
        StreamDefs(features=StreamDef(field='S0',
                                      shape=input_vocab_dim,
                                      is_sparse=True),
                   labels=StreamDef(field='S1',
                                    shape=label_vocab_dim,
                                    is_sparse=True))),
                          randomize=False,
                          max_samples=2)

    raw_input = sequence.input_variable(shape=input_vocab_dim,
                                        sequence_axis=Axis('inputAxis'),
                                        name='raw_input',
                                        is_sparse=True)

    mb_valid = mbs.next_minibatch(minibatch_size_in_samples=100,
                                  input_map={raw_input: mbs.streams.features},
                                  device=cntk_device(device_id))

    z = times(raw_input, np.eye(input_vocab_dim))
    e_reader = z.eval(mb_valid, device=cntk_device(device_id))

    # CSR with the raw_input encoding in ctf_data
    one_hot_data = [[3, 4, 5, 4, 7, 12, 1], [60, 61]]
    data = [
        csr(np.eye(input_vocab_dim, dtype=np.float32)[d]) for d in one_hot_data
    ]
    e_csr = z.eval({raw_input: data}, device=cntk_device(device_id))
    assert np.all([np.allclose(a, b) for a, b in zip(e_reader, e_csr)])

    # One-hot with the raw_input encoding in ctf_data
    data = Value.one_hot(one_hot_data,
                         num_classes=input_vocab_dim,
                         device=cntk_device(device_id))
    e_hot = z.eval({raw_input: data}, device=cntk_device(device_id))
    assert np.all([np.allclose(a, b) for a, b in zip(e_reader, e_hot)])
Exemple #9
0
def test_one_hot_skip():
    a = Value.one_hot([[0,1,Value.ONE_HOT_SKIP]], 3)
    i = sequence.input_variable(shape=(3,))
    b = i * 1
    expected = [[[ 1.,  0.,  0.],
                 [ 0.,  1.,  0.],
                 [ 0.,  0.,  0.]]]
    assert np.allclose(b.eval({i:a}), expected)
Exemple #10
0
def test_sanitize_batch_sparse():
    batch = [csr([[1,0,2],[2,3,0]]),
             csr([5,0,1])]

    var = sequence.input_variable(3, is_sparse=True)
    b = sanitize_batch(var, batch)
    # 2 sequences, with max seq len of 2 and dimension 3
    assert b.shape == (2,2,3)
Exemple #11
0
def test_mask(batch, seq_starts, expected):
    shape = ()
    var = sequence.input_variable(shape)
    if type(expected) == type(ValueError):
        with pytest.raises(expected):
            s = sanitize_batch(var, batch, seq_starts)
    else:
        s = sanitize_batch(var, batch, seq_starts)
        assert np.allclose(s.mask, expected)
Exemple #12
0
def test_mask(batch, seq_starts, expected):
    shape = ()
    var = sequence.input_variable(shape)
    if type(expected) == type(ValueError):
        with pytest.raises(expected):
            s = sanitize_batch(var, batch, seq_starts)
    else:
        s = sanitize_batch(var, batch, seq_starts)
        assert np.allclose(s.mask, expected)
Exemple #13
0
def test_one_hot_int_types(dtype):
    data = [[0, 2, 1], [1]]
    if dtype is not None:
        data = [np.asarray(d, dtype=dtype) for d in data]
    a = Value.one_hot(data, 3)
    i = sequence.input_variable(shape=(3, ))
    b = i * 1
    expected = [[[1., 0., 0.], [0., 0., 1.], [0., 1., 0.]], [[0., 1., 0.]]]
    for a, b in zip(b.eval({i: a}), expected):
        assert np.allclose(a, b)
Exemple #14
0
def run_distributed_training(tmpdir, create_func):

    in1 = sequence.input_variable(shape=1)
    labels = sequence.input_variable(shape=1)
    p = parameter(shape=2, init=10)
    z = plus(in1, reduce_sum(p), name='z')
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error(z, labels)

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample)
    dist_learner = create_func(
        C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant,
                       True))

    communicator = dist_learner.communicator()
    workers = communicator.workers()
    current_worker = communicator.current_worker()
    found_rank = False
    for wk in workers:
        if current_worker.global_rank == wk.global_rank:
            found_rank = True

    assert found_rank

    trainer = C.Trainer(z, (ce, errs), [dist_learner])
    in1_value = [[1], [2]]
    label_value = [[0], [1]]
    arguments = {in1: in1_value, labels: label_value}
    z_output = z.output
    updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output])

    p = str(tmpdir / 'checkpoint.dat')
    trainer.save_checkpoint(p)
    trainer.restore_from_checkpoint(p)

    communicator.barrier()

    assert trainer.model.name == 'z'

    # Ensure that Swig is not leaking raw types
    assert isinstance(trainer.model, Function)
    assert trainer.model.__doc__
def create_recurrent_network():
    # Input variables denoting the features and label data
    features = sequence.input_variable(((2*context+1)*feature_dim))
    labels = sequence.input_variable((num_classes))

    # create network
    model = Sequential([For(range(3), lambda : Recurrence(LSTM(256))),
                        Dense(num_classes)])
    z = model(features)
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error    (z, labels)

    return {
        'feature': features,
        'label': labels,
        'ce' : ce,
        'errs' : errs,
        'output': z
    }
def create_recurrent_network():
    # Input variables denoting the features and label data
    features = sequence.input_variable(((2*context+1)*feature_dim))
    labels = sequence.input_variable((num_classes))

    # create network
    model = Sequential([For(range(3), lambda : Recurrence(LSTM(256))),
                        Dense(num_classes)])
    z = model(features)
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error    (z, labels)

    return {
        'feature': features,
        'label': labels,
        'ce' : ce,
        'errs' : errs,
        'output': z
    }
def test_eval_sparse_no_seq(batch_index_data, device_id):
    dim = 10
    multiplier = 2
    for var_is_sparse in [True, False]:
        in1 = sequence.input_variable(shape=(dim,), is_sparse=var_is_sparse)
        z = times(in1, multiplier*np.eye(dim))
        batch = np.eye(dim)[batch_index_data]
        expected = batch * multiplier
        sparse_val = csr(batch.astype('f'))
        result = z.eval({in1: [sparse_val]}, device=cntk_device(device_id))
        assert np.allclose(result, [expected])
Exemple #18
0
def run_distributed_training(tmpdir, create_func):

    in1 = sequence.input_variable(shape=1)
    labels = sequence.input_variable(shape=1)
    p = parameter(shape=2, init=10)
    z = plus(in1, reduce_sum(p), name='z')
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error(z, labels)

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample)
    dist_learner = create_func(C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True))

    communicator = dist_learner.communicator()
    workers = communicator.workers()
    current_worker = communicator.current_worker()
    found_rank = False
    for wk in workers:
        if current_worker.global_rank == wk.global_rank:
            found_rank = True

    assert found_rank

    trainer = C.Trainer(z, (ce, errs), [ dist_learner ])
    in1_value = [[1],[2]]
    label_value = [[0], [1]]
    arguments = {in1: in1_value, labels: label_value}
    z_output = z.output
    updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output])
    
    p = str(tmpdir / 'checkpoint.dat')
    trainer.save_checkpoint(p)
    trainer.restore_from_checkpoint(p)

    communicator.barrier()

    assert trainer.model.name == 'z'

    # Ensure that Swig is not leaking raw types
    assert isinstance(trainer.model, Function)
    assert trainer.model.__doc__
Exemple #19
0
def test_sanitize_batch_contiguity():
    a1 = AA([[1, 2], [3, 4]])
    a2 = AA([[5, 6], [7, 8]])
    var = sequence.input_variable((2, 2), is_sparse=True)

    batch = [a1.T, a2.T]
    with pytest.warns(RuntimeWarning):
        b = sanitize_batch(var, batch)
        assert b.shape == (2, 1, 2, 2)

    batch = [a1, a2]
    b = sanitize_batch(var, batch)
    assert b.shape == (2, 1, 2, 2)
def test_eval_one_hot_seq(one_hot_batch, device_id):
    dim = 10
    multiplier = 2

    for var_is_sparse in [True, False]:
        in1 = sequence.input_variable(shape=(dim,), is_sparse=var_is_sparse)
        # Convert CNTK node value to dense so that we can compare it later
        z = times(in1, np.eye(dim)*multiplier)
        # Convert expectation to dense
        expected = [np.eye(dim)[seq]*multiplier for seq in one_hot_batch]
        batch = Value.one_hot(one_hot_batch, num_classes=dim, device=cntk_device(device_id))
        result = z.eval({in1: batch}, device=cntk_device(device_id))
        assert np.all([np.allclose(a,b) for a,b in zip(result, expected)])
Exemple #21
0
def test_one_hot_int_types(dtype):
    data = [[0,2,1],[1]]
    if dtype is not None:
        data = [np.asarray(d, dtype=dtype) for d in data]
    a = Value.one_hot(data, 3)
    i = sequence.input_variable(shape=(3,))
    b = i * 1
    expected = [[[ 1.,  0.,  0.],
                 [ 0.,  0.,  1.],
                 [ 0.,  1.,  0.]],
                [[ 0.,  1.,  0.]]]
    for a, b in zip (b.eval({i:a}), expected):
        assert np.allclose(a, b)
Exemple #22
0
def test_sanitize_batch_contiguity():
    a1 = AA([[1,2],[3,4]])
    a2 = AA([[5,6],[7,8]])
    var = sequence.input_variable((2,2), is_sparse=True)

    batch = [a1.T,a2.T]
    with pytest.warns(RuntimeWarning):
        b = sanitize_batch(var, batch)
        assert b.shape == (2,1,2,2)

    batch = [a1,a2]
    b = sanitize_batch(var, batch)
    assert b.shape == (2,1,2,2)
Exemple #23
0
def test_times_transpose_sequence_param(device_id, precision):
    dt_precision = PRECISION_TO_TYPE[precision]

    from cntk import times_transpose, parameter, sequence, Value
    dim = 5
    num_sequences = 2
    seq = [i for i in range(dim)]
    identity = np.identity(dim, dtype=dt_precision)
    input_data = Value.one_hot([seq] * num_sequences, dim, dtype=dt_precision)
    input_var = sequence.input_variable(shape=(dim),
                                        needs_gradient=True,
                                        dtype=dt_precision)
    e = parameter(shape=(dim, ), init=1, dtype=dt_precision)
    z = times_transpose(e, input_var)
    e_grad = z.grad({input_var: input_data}, [e, input_var])
def test_op_times_sparse_grad(device_id, precision):
    dt_precision = PRECISION_TO_TYPE[precision]

    from cntk import times, times_transpose, parameter, reshape, Value, sequence
    dim = 5
    num_sequences = 2
    seq = [i for i in range(dim)]
    identity = np.identity(dim, dtype=dt_precision)
    input_data = Value.one_hot([seq]*num_sequences, dim, dtype=dt_precision)
    input_var  = sequence.input_variable(shape=(dim), is_sparse=True, needs_gradient=False, dtype=dt_precision)
    e = parameter(shape = (dim, dim), init = identity, dtype=dt_precision)
    z = reshape(times_transpose(e, times(input_var, e)), dim)
    e_grad = z.grad({input_var : input_data}, [e])
    
    assert np.allclose(e_grad, np.ones((dim,dim))*4)
Exemple #25
0
def test_op_times_sparse_grad(device_id, precision):
    dt_precision = PRECISION_TO_TYPE[precision]

    from cntk import times, times_transpose, parameter, reshape, Value, sequence
    dim = 5
    num_sequences = 2
    seq = [i for i in range(dim)]
    identity = np.identity(dim, dtype=dt_precision)
    input_data = Value.one_hot([seq]*num_sequences, dim, dtype=dt_precision)
    input_var  = sequence.input_variable(shape=(dim), is_sparse=True, needs_gradient=False, dtype=dt_precision)
    e = parameter(shape = (dim, dim), init = identity, dtype=dt_precision)
    z = reshape(times_transpose(e, times(input_var, e)), dim)
    e_grad = z.grad({input_var : input_data}, [e])
    
    assert np.allclose(e_grad, np.ones((dim,dim))*4)
def test_eval_sparse_seq_1(batch, device_id):
    dim = 4
    multiplier = 2
    for var_is_sparse in [True, False]:
        in1 = sequence.input_variable(shape=(dim,), is_sparse=var_is_sparse)
        z = times(in1, multiplier*np.eye(dim))
        if isinstance(batch[0], list):
            expected = [np.vstack([m.todense() * multiplier for m in seq]) for seq in
                    batch]
        else:
            expected = [seq.todense() * multiplier for seq in batch]
        result = z.eval({in1: batch}, device=cntk_device(device_id))

        assert np.all([np.allclose(a,b) for a,b in zip(result, expected)]), \
                "%s != %s"%(result,expected)
Exemple #27
0
def train_sequence_classifier():
    input_dim = 2000
    cell_dim = 25
    hidden_dim = 25
    embedding_dim = 50
    num_output_classes = 5

    # Input variables denoting the features and label data
    features = sequence.input_variable(shape=input_dim, is_sparse=True)
    label = input_variable(num_output_classes)

    # Instantiate the sequence classification model
    classifier_output = LSTM_sequence_classifier_net(features,
                                                     num_output_classes,
                                                     embedding_dim, hidden_dim,
                                                     cell_dim)

    ce = cross_entropy_with_softmax(classifier_output, label)
    pe = classification_error(classifier_output, label)

    rel_path = ("../../../Tests/EndToEndTests/Text/" +
                "SequenceClassification/Data/Train.ctf")
    path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path)

    reader = create_reader(path, True, input_dim, num_output_classes)

    input_map = {
        features: reader.streams.features,
        label: reader.streams.labels
    }

    lr_per_sample = learning_rate_schedule(0.0005, UnitType.sample)
    # Instantiate the trainer object to drive the model training
    progress_printer = ProgressPrinter(0)
    trainer = Trainer(classifier_output, (ce, pe),
                      sgd(classifier_output.parameters, lr=lr_per_sample),
                      progress_printer)

    # Get minibatches of sequences to train with and perform model training
    minibatch_size = 200

    for i in range(255):
        mb = reader.next_minibatch(minibatch_size, input_map=input_map)
        trainer.train_minibatch(mb)

    evaluation_average = float(trainer.previous_minibatch_evaluation_average)
    loss_average = float(trainer.previous_minibatch_loss_average)
    return evaluation_average, loss_average
Exemple #28
0
def test_usermbsource_training(tmpdir, with_checkpoint_impl):
    input_dim = 1000
    num_output_classes = 5

    mbs = MyDataSource(input_dim, num_output_classes)
    # Using this for testing the UserMinibatchSource checkpointing
    if with_checkpoint_impl:
        MBS_CV_CLASS = MyDataSourceWithCheckpoint
    else:
        MBS_CV_CLASS = MyDataSource

    mbs_cv = MBS_CV_CLASS(input_dim, num_output_classes)

    from cntk import sequence, parameter, plus, cross_entropy_with_softmax, \
            classification_error, learning_rate_schedule, sgd, Trainer, \
            training_session, times, UnitType

    feature = sequence.input_variable(shape=(input_dim,))
    label = C.input_variable(shape=(num_output_classes,))
    p = parameter(shape=(input_dim, num_output_classes), init=10)
    z = times(sequence.reduce_sum(feature), p, name='z')
    ce = cross_entropy_with_softmax(z, label)
    errs = classification_error(z, label)

    #having a large learning rate to prevent the model from converging earlier where not all the intended samples are fed
    #note that training session can end earlier if there is no updates
    lr_per_sample = learning_rate_schedule(0.3, UnitType.sample)
    learner = sgd(z.parameters, lr_per_sample)
    trainer = Trainer(z, (ce, errs), [learner])
    input_map = {
        feature: mbs.fsi,
        label: mbs.lsi
    }

    session = training_session(
        trainer=trainer, mb_source=mbs,
        model_inputs_to_streams=input_map,
        mb_size=4, max_samples=20,
        cv_config = C.CrossValidationConfig(minibatch_source=mbs_cv, max_samples=10,
            minibatch_size=2)
    )
    session.train()

    assert trainer.total_number_of_samples_seen == 20
    if with_checkpoint_impl:
        assert mbs_cv._restore_from_checkpoint_calls == 1
Exemple #29
0
def train_sequence_classifier():
    input_dim = 2000
    cell_dim = 25
    hidden_dim = 25
    embedding_dim = 50
    num_output_classes = 5

    # Input variables denoting the features and label data
    features = sequence.input_variable(shape=input_dim, is_sparse=True)
    label = input_variable(num_output_classes)

    # Instantiate the sequence classification model
    classifier_output = LSTM_sequence_classifier_net(
        features, num_output_classes, embedding_dim, hidden_dim, cell_dim)

    ce = cross_entropy_with_softmax(classifier_output, label)
    pe = classification_error(classifier_output, label)

    rel_path = ("../../../Tests/EndToEndTests/Text/" +
                "SequenceClassification/Data/Train.ctf")
    path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path)

    reader = create_reader(path, True, input_dim, num_output_classes)

    input_map = {
            features: reader.streams.features,
            label:    reader.streams.labels
    }

    lr_per_sample = learning_parameter_schedule_per_sample(0.0005)
    # Instantiate the trainer object to drive the model training
    progress_printer = ProgressPrinter(0)
    trainer = Trainer(classifier_output, (ce, pe),
                      sgd(classifier_output.parameters, lr=lr_per_sample),
                      progress_printer)

    # Get minibatches of sequences to train with and perform model training
    minibatch_size = 200

    for i in range(255):
        mb = reader.next_minibatch(minibatch_size, input_map=input_map)
        trainer.train_minibatch(mb)

    evaluation_average = float(trainer.previous_minibatch_evaluation_average)
    loss_average = float(trainer.previous_minibatch_loss_average)
    return evaluation_average, loss_average
Exemple #30
0
def test_usermbsource_training(tmpdir, with_checkpoint_impl):
    input_dim = 1000
    num_output_classes = 5

    mbs = MyDataSource(input_dim, num_output_classes)
    # Using this for testing the UserMinibatchSource checkpointing
    if with_checkpoint_impl:
        MBS_CV_CLASS = MyDataSourceWithCheckpoint
    else:
        MBS_CV_CLASS = MyDataSource

    mbs_cv = MBS_CV_CLASS(input_dim, num_output_classes)

    from cntk import sequence, parameter, plus, cross_entropy_with_softmax, \
            classification_error, learning_parameter_schedule_per_sample, sgd, Trainer, \
            training_session, times

    feature = sequence.input_variable(shape=(input_dim,))
    label = C.input_variable(shape=(num_output_classes,))
    p = parameter(shape=(input_dim, num_output_classes), init=10)
    z = times(sequence.reduce_sum(feature), p, name='z')
    ce = cross_entropy_with_softmax(z, label)
    errs = classification_error(z, label)

    #having a large learning rate to prevent the model from converging earlier where not all the intended samples are fed
    #note that training session can end earlier if there is no updates
    lr_per_sample = learning_parameter_schedule_per_sample(0.3)
    learner = sgd(z.parameters, lr_per_sample)
    trainer = Trainer(z, (ce, errs), [learner])
    input_map = {
        feature: mbs.fsi,
        label: mbs.lsi
    }

    session = training_session(
        trainer=trainer, mb_source=mbs,
        model_inputs_to_streams=input_map,
        mb_size=4, max_samples=20,
        cv_config = C.CrossValidationConfig(minibatch_source=mbs_cv, max_samples=10,
            minibatch_size=2)
    )
    session.train()

    assert trainer.total_number_of_samples_seen == 20
    if with_checkpoint_impl:
        assert mbs_cv._restore_from_checkpoint_calls == 1
Exemple #31
0
def test_sweep_based_schedule(tmpdir, device_id):
    from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs
    from cntk import cross_entropy_with_softmax, classification_error, plus, reduce_sum, sequence
    from cntk import Trainer

    input_dim = 69

    ctf_data = '''\
0   |S0 3:1   |S1 3:1 |# <s>
0   |S0 4:1 |# A    |S1 32:1 |# ~AH
0   |S0 5:1 |# B    |S1 36:1 |# ~B
0   |S0 4:1 |# A    |S1 31:1 |# ~AE
0   |S0 7:1 |# D    |S1 38:1 |# ~D
0   |S0 12:1 |# I   |S1 47:1 |# ~IY
0   |S0 1:1 |# </s> |S1 1:1 |# </s>
2   |S0 60:1 |# <s> |S1 3:1 |# <s>
2   |S0 61:1 |# A   |S1 32:1 |# ~AH
'''
    ctf_file = str(tmpdir/'2seqtest.txt')
    with open(ctf_file, 'w') as f:
        f.write(ctf_data)

    mbs = MinibatchSource(CTFDeserializer(ctf_file, StreamDefs(
        features  = StreamDef(field='S0', shape=input_dim,  is_sparse=True),
        labels    = StreamDef(field='S1', shape=input_dim,  is_sparse=True)
    )), randomize=False)

    in1 = sequence.input_variable(shape=(input_dim,))
    labels = sequence.input_variable(shape=(input_dim,))
    p = parameter(shape=(input_dim,), init=10)
    z = plus(in1, reduce_sum(p), name='z')
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error(z, labels)

    lr_per_sample = learning_rate_schedule([0.3, 0.2, 0.1, 0.0], UnitType.sample)
    learner = sgd(z.parameters, lr_per_sample)
    trainer = Trainer(z, (ce, errs), [learner])

    input_map = {
        in1       : mbs.streams.features,
        labels : mbs.streams.labels
    }

    # fetch minibatch (first sequence)
    data = mbs.next_minibatch(1, input_map=input_map) 
    trainer.train_minibatch(data)
    assert learner.learning_rate() == 0.3

    # fetch minibatch (second sequence, sweep ends at this point)
    data = mbs.next_minibatch(1, input_map=input_map)
    trainer.train_minibatch(data)
    assert learner.learning_rate() == 0.2

    # fetch minibatch (both sequences -- entire sweep in one go)
    data = mbs.next_minibatch(9, input_map=input_map)
    trainer.train_minibatch(data)
    assert learner.learning_rate() == 0.1

    # fetch minibatch (multiple sweeps)
    data = mbs.next_minibatch(30, input_map=input_map)
    trainer.train_minibatch(data, outputs=[z.output])
    assert learner.learning_rate() == 0.0
Exemple #32
0
def test_distributed_mb_source(tmpdir):
    input_dim = 69

    ctf_data = '''\
0	|S0 3:1 |# <s>	|S1 3:1 |# <s>
0	|S0 4:1 |# A	|S1 32:1 |# ~AH
0	|S0 5:1 |# B	|S1 36:1 |# ~B
0	|S0 4:1 |# A	|S1 31:1 |# ~AE
0	|S0 7:1 |# D	|S1 38:1 |# ~D
0	|S0 12:1 |# I	|S1 47:1 |# ~IY
0	|S0 1:1 |# </s>	|S1 1:1 |# </s>
2	|S0 60:1 |# <s>	|S1 3:1 |# <s>
2	|S0 61:1 |# A	|S1 32:1 |# ~AH
2	|S0 61:1 |# A	|S1 32:1 |# ~AH
3	|S0 60:1 |# <s>	|S1 3:1 |# <s>
3	|S0 61:1 |# A	|S1 32:1 |# ~AH
3	|S0 61:1 |# A	|S1 32:1 |# ~AH
3	|S0 61:1 |# A	|S1 32:1 |# ~AH
4	|S0 60:1 |# <s>	|S1 3:1 |# <s>
5	|S0 60:1 |# <s>	|S1 3:1 |# <s>
5	|S0 61:1 |# A	|S1 32:1 |# ~AH
6	|S0 60:1 |# <s>	|S1 3:1 |# <s>
6	|S0 61:1 |# A	|S1 32:1 |# ~AH
7	|S0 60:1 |# <s>	|S1 3:1 |# <s>
8	|S0 60:1 |# <s>	|S1 3:1 |# <s>
8	|S0 61:1 |# A	|S1 32:1 |# ~AH
9	|S0 60:1 |# <s>	|S1 3:1 |# <s>
9	|S0 61:1 |# A	|S1 32:1 |# ~AH
10	|S0 61:1 |# A	|S1 32:1 |# ~AH
'''
    from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs

    ctf_file = str(tmpdir / '2seqtest.txt')
    with open(ctf_file, 'w') as f:
        f.write(ctf_data)

    # No randomization

    mb0 = MinibatchSource(CTFDeserializer(
        ctf_file,
        StreamDefs(features=StreamDef(field='S0',
                                      shape=input_dim,
                                      is_sparse=True),
                   labels=StreamDef(field='S1',
                                    shape=input_dim,
                                    is_sparse=True))),
                          randomize=False,
                          max_samples=36)  # A bit more than a sweep
    mb1 = MinibatchSource(CTFDeserializer(
        ctf_file,
        StreamDefs(features=StreamDef(field='S0',
                                      shape=input_dim,
                                      is_sparse=True),
                   labels=StreamDef(field='S1',
                                    shape=input_dim,
                                    is_sparse=True))),
                          randomize=False,
                          max_samples=36)  # A bit more than a sweep
    input = sequence.input_variable(shape=(input_dim, ))
    label = sequence.input_variable(shape=(input_dim, ))
    input_map = {input: mb0.streams.features, label: mb0.streams.labels}

    # Because we emulating two workers here, the minibatch_size_in_samples will be splitted in 2,
    # so below we expect 5 samples per worker.
    data = mb0.next_minibatch(minibatch_size_in_samples=10,
                              input_map=input_map,
                              num_data_partitions=2,
                              partition_index=0)
    assert (data[input].num_samples == 7)  # Sequence 0

    data = mb0.next_minibatch(minibatch_size_in_samples=10,
                              input_map=input_map,
                              num_data_partitions=2,
                              partition_index=0)
    assert (data[input].num_samples == 4)  # Sequence 3

    data = mb0.next_minibatch(minibatch_size_in_samples=10,
                              input_map=input_map,
                              num_data_partitions=2,
                              partition_index=0)
    assert (data[input].num_samples == 5)  # Sequences 5, 7, 9

    data = mb0.next_minibatch(minibatch_size_in_samples=10,
                              input_map=input_map,
                              num_data_partitions=2,
                              partition_index=0)
    assert (data[input].num_samples == 7)  # Sequence 0

    data = mb0.next_minibatch(minibatch_size_in_samples=10,
                              input_map=input_map,
                              num_data_partitions=2,
                              partition_index=0)
    assert (data[input].num_samples == 4)  # Sequence 3

    data = mb0.next_minibatch(minibatch_size_in_samples=10,
                              input_map=input_map,
                              num_data_partitions=2,
                              partition_index=0)
    assert (len(data) == 0)  # No data

    data = mb1.next_minibatch(minibatch_size_in_samples=10,
                              input_map=input_map,
                              num_data_partitions=2,
                              partition_index=1)
    assert (data[input].num_samples == 4)  # Sequences 2, 4

    data = mb1.next_minibatch(minibatch_size_in_samples=10,
                              input_map=input_map,
                              num_data_partitions=2,
                              partition_index=1)
    assert (data[input].num_samples == 5)  # Sequences 6, 8, 10

    data = mb1.next_minibatch(minibatch_size_in_samples=10,
                              input_map=input_map,
                              num_data_partitions=2,
                              partition_index=1)
    assert (data[input].num_samples == 3)  # Sequences 2

    data = mb1.next_minibatch(minibatch_size_in_samples=10,
                              input_map=input_map,
                              num_data_partitions=2,
                              partition_index=1)
    assert (len(data) == 0)  # No data

    # Radomization

    mb3 = MinibatchSource(CTFDeserializer(
        ctf_file,
        StreamDefs(features=StreamDef(field='S0',
                                      shape=input_dim,
                                      is_sparse=True),
                   labels=StreamDef(field='S1',
                                    shape=input_dim,
                                    is_sparse=True))),
                          max_sweeps=1)

    mb4 = MinibatchSource(CTFDeserializer(
        ctf_file,
        StreamDefs(features=StreamDef(field='S0',
                                      shape=input_dim,
                                      is_sparse=True),
                   labels=StreamDef(field='S1',
                                    shape=input_dim,
                                    is_sparse=True))),
                          max_sweeps=1)

    data = mb3.next_minibatch(minibatch_size_in_samples=10,
                              input_map=input_map,
                              num_data_partitions=2,
                              partition_index=0)
    assert (data[input].num_samples == 5)

    data = mb3.next_minibatch(minibatch_size_in_samples=10,
                              input_map=input_map,
                              num_data_partitions=2,
                              partition_index=0)
    assert (data[input].num_samples == 4)

    data = mb3.next_minibatch(minibatch_size_in_samples=10,
                              input_map=input_map,
                              num_data_partitions=2,
                              partition_index=0)
    assert (data[input].num_samples == 4)

    data = mb3.next_minibatch(minibatch_size_in_samples=10,
                              input_map=input_map,
                              num_data_partitions=2,
                              partition_index=0)
    assert (data[input].num_samples == 5)

    data = mb3.next_minibatch(minibatch_size_in_samples=10,
                              input_map=input_map,
                              num_data_partitions=2,
                              partition_index=0)
    assert (data[input].num_samples == 7)

    data = mb4.next_minibatch(minibatch_size_in_samples=10,
                              input_map=input_map,
                              num_data_partitions=2,
                              partition_index=1)
    assert (len(data) == 0
            )  # Due to chunking we do not expect any data for rank 1
Exemple #33
0
def test_sweep_based_schedule(tmpdir, device_id):
    from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs
    from cntk import cross_entropy_with_softmax, classification_error, plus, reduce_sum, sequence
    from cntk import Trainer

    input_dim = 69

    ctf_data = '''\
0   |S0 3:1   |S1 3:1 |# <s>
0   |S0 4:1 |# A    |S1 32:1 |# ~AH
0   |S0 5:1 |# B    |S1 36:1 |# ~B
0   |S0 4:1 |# A    |S1 31:1 |# ~AE
0   |S0 7:1 |# D    |S1 38:1 |# ~D
0   |S0 12:1 |# I   |S1 47:1 |# ~IY
0   |S0 1:1 |# </s> |S1 1:1 |# </s>
2   |S0 60:1 |# <s> |S1 3:1 |# <s>
2   |S0 61:1 |# A   |S1 32:1 |# ~AH
'''
    ctf_file = str(tmpdir/'2seqtest.txt')
    with open(ctf_file, 'w') as f:
        f.write(ctf_data)

    mbs = MinibatchSource(CTFDeserializer(ctf_file, StreamDefs(
        features  = StreamDef(field='S0', shape=input_dim,  is_sparse=True),
        labels    = StreamDef(field='S1', shape=input_dim,  is_sparse=True)
    )), randomize=False)

    in1 = sequence.input_variable(shape=(input_dim,))
    labels = sequence.input_variable(shape=(input_dim,))
    p = parameter(shape=(input_dim,), init=10)
    z = plus(in1, reduce_sum(p), name='z')
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error(z, labels)

    lr_per_sample = learning_rate_schedule([0.3, 0.2, 0.1, 0.0], UnitType.sample)
    learner = sgd(z.parameters, lr_per_sample)
    trainer = Trainer(z, (ce, errs), [learner])

    input_map = {
        in1       : mbs.streams.features,
        labels : mbs.streams.labels
    }

    # fetch minibatch (first sequence)
    data = mbs.next_minibatch(1, input_map=input_map)
    trainer.train_minibatch(data)
    assert learner.learning_rate() == 0.3

    # fetch minibatch (second sequence, sweep ends at this point)
    data = mbs.next_minibatch(1, input_map=input_map)
    trainer.train_minibatch(data)
    assert learner.learning_rate() == 0.2

    # fetch minibatch (both sequences -- entire sweep in one go)
    data = mbs.next_minibatch(9, input_map=input_map)
    trainer.train_minibatch(data)
    assert learner.learning_rate() == 0.1

    # fetch minibatch (multiple sweeps)
    data = mbs.next_minibatch(30, input_map=input_map)
    trainer.train_minibatch(data, outputs=[z.output])
    assert learner.learning_rate() == 0.0
Exemple #34
0
def test_distributed_mb_source(tmpdir):
    input_dim = 69

    ctf_data = '''\
0	|S0 3:1 |# <s>	|S1 3:1 |# <s>
0	|S0 4:1 |# A	|S1 32:1 |# ~AH
0	|S0 5:1 |# B	|S1 36:1 |# ~B
0	|S0 4:1 |# A	|S1 31:1 |# ~AE
0	|S0 7:1 |# D	|S1 38:1 |# ~D
0	|S0 12:1 |# I	|S1 47:1 |# ~IY
0	|S0 1:1 |# </s>	|S1 1:1 |# </s>
2	|S0 60:1 |# <s>	|S1 3:1 |# <s>
2	|S0 61:1 |# A	|S1 32:1 |# ~AH
2	|S0 61:1 |# A	|S1 32:1 |# ~AH
3	|S0 60:1 |# <s>	|S1 3:1 |# <s>
3	|S0 61:1 |# A	|S1 32:1 |# ~AH
3	|S0 61:1 |# A	|S1 32:1 |# ~AH
3	|S0 61:1 |# A	|S1 32:1 |# ~AH
4	|S0 60:1 |# <s>	|S1 3:1 |# <s>
5	|S0 60:1 |# <s>	|S1 3:1 |# <s>
5	|S0 61:1 |# A	|S1 32:1 |# ~AH
6	|S0 60:1 |# <s>	|S1 3:1 |# <s>
6	|S0 61:1 |# A	|S1 32:1 |# ~AH
7	|S0 60:1 |# <s>	|S1 3:1 |# <s>
8	|S0 60:1 |# <s>	|S1 3:1 |# <s>
8	|S0 61:1 |# A	|S1 32:1 |# ~AH
9	|S0 60:1 |# <s>	|S1 3:1 |# <s>
9	|S0 61:1 |# A	|S1 32:1 |# ~AH
10	|S0 61:1 |# A	|S1 32:1 |# ~AH
'''
    from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs

    ctf_file = str(tmpdir/'2seqtest.txt')
    with open(ctf_file, 'w') as f:
        f.write(ctf_data)

    # No randomization

    mb0 = MinibatchSource(CTFDeserializer(ctf_file, StreamDefs(
        features  = StreamDef(field='S0', shape=input_dim,  is_sparse=True),
        labels    = StreamDef(field='S1', shape=input_dim,  is_sparse=True)
        )), 
        randomize=False, max_samples=36) # A bit more than a sweep
    mb1 = MinibatchSource(CTFDeserializer(ctf_file, StreamDefs(
        features  = StreamDef(field='S0', shape=input_dim,  is_sparse=True),
        labels    = StreamDef(field='S1', shape=input_dim,  is_sparse=True)
        )), 
        randomize=False, max_samples=36) # A bit more than a sweep
    input = sequence.input_variable(shape=(input_dim,))
    label = sequence.input_variable(shape=(input_dim,))
    input_map = {
        input : mb0.streams.features,
        label : mb0.streams.labels
    }

    # Because we emulating two workers here, the minibatch_size_in_samples will be splitted in 2,
    # so below we expect 5 samples per worker.
    data = mb0.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0)
    assert(data[input].num_samples == 7) # Sequence 0

    data = mb0.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0)
    assert(data[input].num_samples == 4) # Sequence 3

    data = mb0.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0)
    assert(data[input].num_samples == 5) # Sequences 5, 7, 9

    data = mb0.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0)
    assert(data[input].num_samples == 7) # Sequence 0

    data = mb0.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0)
    assert(data[input].num_samples == 4) # Sequence 3

    data = mb0.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0)
    assert(len(data) == 0) # No data

    data = mb1.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=1)
    assert(data[input].num_samples == 4) # Sequences 2, 4

    data = mb1.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=1)
    assert(data[input].num_samples == 5) # Sequences 6, 8, 10

    data = mb1.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=1)
    assert(data[input].num_samples == 3) # Sequences 2

    data = mb1.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=1)
    assert(len(data) == 0) # No data

    # Radomization

    mb3 = MinibatchSource(CTFDeserializer(ctf_file, StreamDefs(
        features  = StreamDef(field='S0', shape=input_dim,  is_sparse=True),
        labels    = StreamDef(field='S1', shape=input_dim,  is_sparse=True)
        )), max_sweeps=1)

    mb4 = MinibatchSource(CTFDeserializer(ctf_file, StreamDefs(
        features  = StreamDef(field='S0', shape=input_dim,  is_sparse=True),
        labels    = StreamDef(field='S1', shape=input_dim,  is_sparse=True)
        )), max_sweeps=1)

    data = mb3.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0)
    assert(data[input].num_samples == 5)

    data = mb3.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0)
    assert(data[input].num_samples == 4)

    data = mb3.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0)
    assert(data[input].num_samples == 4)

    data = mb3.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0)
    assert(data[input].num_samples == 5)

    data = mb3.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0)
    assert(data[input].num_samples == 7)

    data = mb4.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=1)
    assert(len(data) == 0) # Due to chunking we do not expect any data for rank 1
Exemple #35
0
def test_one_hot_skip():
    a = Value.one_hot([[0, 1, Value.ONE_HOT_SKIP]], 3)
    i = sequence.input_variable(shape=(3, ))
    b = i * 1
    expected = [[[1., 0., 0.], [0., 1., 0.], [0., 0., 0.]]]
    assert np.allclose(b.eval({i: a}), expected)