Exemple #1
0
def test_cos_distane_backward():
    x = sequence.input(shape=(2, ),
                       sequence_axis=Axis("B"),
                       needs_gradient=True)
    y = sequence.input(shape=(2, ),
                       sequence_axis=Axis("B"),
                       needs_gradient=True)
    z = cosine_distance(x, y)
    a = np.reshape(np.float32([0.25, 0.5, 0.1, 1]), (1, 2, 2))
    b = np.reshape(np.float32([-0.5, 1.5, -0.3, -1]), (1, 2, 2))
    bwd, fwd = z.forward({x: a, y: b}, [z.output], set([z.output]))
    value = list(fwd.values())[0]
    expected = [[0.707107, -0.981665]]
    assert np.allclose(value, expected)
    grad = z.backward(bwd, {z.output: np.ones_like(value)}, set([x, y]))
    x_driv_expected = np.ndarray(
        (1, 2, 2),
        dtype=np.float32,
        buffer=np.float32([-1.131371, 0.565686, -0.188727, 0.018873]))
    y_driv_expected = np.ndarray(
        (1, 2, 2),
        dtype=np.float32,
        buffer=np.float32([0.424264, 0.141421, -0.174876, 0.052463]))
    assert (np.all(np.absolute(grad[x] - x_driv_expected) < 1e-6))
    assert (np.all(np.absolute(grad[y] - y_driv_expected) < 1e-6))
Exemple #2
0
def test_op_times_reduce_sequence_axis(device_id, precision):
    dt_precision = PRECISION_TO_TYPE[precision]

    from cntk import times, Value, TIMES_REDUCE_SEQUENCE_AXIS_WITHOUT_INFERRED_INPUT_RANK
    from cntk import sequence
    dim = 10
    seq = [[0,1,2], [3], [4,5,6,7,8,9]]
    right_data = Value.one_hot(seq, dim, dtype=dt_precision)
    right_var = sequence.input(shape=(dim), is_sparse=True, dtype=dt_precision)
    left_data = [AA([1,1,1],dtype=dt_precision), AA([1],dtype=dt_precision), AA([1,1,1,1,1,1],dtype=dt_precision)]
    left_var = sequence.input(shape=(1), dtype=dt_precision)

    func = times(left_var, right_var, infer_input_rank_to_map=TIMES_REDUCE_SEQUENCE_AXIS_WITHOUT_INFERRED_INPUT_RANK)
    func2 = sequence.reduce_sum(times(left_var, right_var))

    assert func.dynamic_axes == func2.dynamic_axes

    _, forward_output = func.forward({left_var:left_data, right_var:right_data})
    
    actual_forward = forward_output[func.output]

    expected_forward = AA([[[1,1,1,0,0,0,0,0,0,0]],
                           [[0,0,0,1,0,0,0,0,0,0]],
                           [[0,0,0,0,1,1,1,1,1,1]]])
    
    assert np.allclose(actual_forward, expected_forward)
Exemple #3
0
def test_model_not_criterion_subset():
    input_dim = 2
    proj_dim = 11
    model1_dim = 3
    model2_dim = 4
    x = sequence.input((input_dim, ))

    core = Embedding(proj_dim)
    model1 = Dense(model1_dim)(sequence.last(core(x)))
    model1_label = input((model1_dim, ))
    ce_model1 = cross_entropy_with_softmax(model1, model1_label)
    pe_model1 = classification_error(model1, model1_label)

    model2 = Dense(model2_dim)(core(x))
    model2_label = sequence.input((model2_dim, ))
    ce_model2 = cross_entropy_with_softmax(model2, model2_label)
    pe_model2 = classification_error(model2, model2_label)

    ce = 0.5 * sequence.reduce_sum(ce_model2) + 0.5 * ce_model1

    lr_schedule = learning_rate_schedule(0.003, UnitType.sample)
    trainer_multitask = Trainer(model1, (ce, pe_model1),
                                sgd(ce.parameters, lr=lr_schedule))

    x_data = np.asarray([[2., 1.], [1., 2.]], np.float32)
    model1_label_data = np.asarray([1., 0., 0.], np.float32)
    model2_label_data = np.asarray([[0., 1., 0., 0.], [0., 0., 0., 1.]],
                                   np.float32)
    trainer_multitask.train_minibatch({
        x: [x_data],
        model1_label: [model1_label_data],
        model2_label: [model2_label_data]
    })
Exemple #4
0
def test_cosine_distance_with_negative_samples():
    a = np.array(
        [[1., 1., 0., 0., 0.], [0., 1., 1., 0., 0.], [0., 0., 1., 1., 0.],
         [0., 0., 0., 1., 1.], [1., 0., 0., 0., 1.]],
        dtype=np.float32)
    b = np.array(
        [[1., 1., 0., 0., 0.], [0., 1., 1., 0., 0.], [0., 0., 1., 1., 0.],
         [0., 0., 0., 1., 1.], [1., 0., 0., 0., 1.]],
        dtype=np.float32)

    qry = sequence.input(shape=(5))
    doc = sequence.input(shape=(5))
    num_neg_samples = 2
    model = cosine_distance_with_negative_samples(
        qry, doc, shift=1, num_negative_samples=num_neg_samples)
    result = model.eval({qry: [a], doc: [b]})

    # We expect 1 row per minibatch
    np.allclose(len(result), a.shape[0])

    # We expect the number of columns to be number of negative samples + 1
    np.allclose(result[0].shape[1], num_neg_samples + 1)

    # The first value is exact match, second ony 1 element match and last one is 0 match
    np.allclose(result[0], np.tile([1, 0.5, 0.], (a.shape[0], 1)))
def test_rank0_output():
  x = sequence.input(shape=(768,), sequence_axis=Axis("B"), needs_gradient=True)
  y = sequence.input(shape=(768,), sequence_axis=Axis("B"), needs_gradient=True)
  z = cosine_distance(x, y)
  batch_num = 2
  batch_size = 30
  a = np.float32(np.random.rand(batch_num*batch_size,1500,768))
  b = np.float32(np.random.rand(batch_num*batch_size,1500,768))
  for i in range(batch_num):
    bwd, fwd = z.forward({x:a[i*batch_size:(i+1)*batch_size], y:b[i*batch_size:(i+1)*batch_size]}, [z.output], set([z.output]))
    grad = z.backward(bwd, {z.output:np.ones_like(fwd[z.output])}, set([x, y]))
Exemple #6
0
def create_sample_model(device, writer=None):
    in1 = sequence.input(shape=(input_dim, ))
    labels = sequence.input(shape=(input_dim, ))
    p = parameter(shape=(input_dim, ), init=10, device=device)
    z = plus(in1, reduce_sum(p), name='z')
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error(z, labels)

    lr_per_sample = learning_rate_schedule([0.3, 0.2, 0.1, 0.0],
                                           UnitType.sample)
    learner = sgd(z.parameters, lr_per_sample)
    trainer = Trainer(z, (ce, errs), [learner], writer)
    return (trainer, in1, labels)
Exemple #7
0
def test_usermbsource_training(tmpdir):
    input_dim = 1000
    num_output_classes = 5

    mbs = MyDataSource(input_dim, num_output_classes)

    from cntk import sequence, parameter, plus, cross_entropy_with_softmax, \
            classification_error, learning_rate_schedule, sgd, Trainer, \
            training_session, times, UnitType, input

    feature = sequence.input(shape=(input_dim, ))
    label = input(shape=(num_output_classes, ))
    p = parameter(shape=(input_dim, num_output_classes), init=10)
    z = times(sequence.reduce_sum(feature), p, name='z')
    ce = cross_entropy_with_softmax(z, label)
    errs = classification_error(z, label)

    lr_per_sample = learning_rate_schedule([0.3, 0.2, 0.1, 0.0],
                                           UnitType.sample)
    learner = sgd(z.parameters, lr_per_sample)
    trainer = Trainer(z, (ce, errs), [learner])
    input_map = {feature: mbs.fsi, label: mbs.lsi}

    session = training_session(trainer=trainer,
                               mb_source=mbs,
                               model_inputs_to_streams=input_map,
                               mb_size=4,
                               max_samples=20)
    session.train()

    assert trainer.total_number_of_samples_seen == 20
Exemple #8
0
def test_not_replaced_placeholders():
    def wrap_in_block(fun_args, name):
        block_args = [placeholder(name=arg.name) for arg in fun_args
                      ]  # placeholders inside the BlockFunction
        combined_block_args = combine(
            block_args)  # the content of the BlockFunction
        arg_map = list(
            zip(block_args,
                fun_args))  # after wrapping, the block_args map to args
        combined_args = as_block(composite=combined_block_args,
                                 block_arguments_map=arg_map,
                                 block_op_name=name)
        return combined_args

    input_dim = 2
    x = sequence.input(shape=(input_dim, ))
    p1 = placeholder()
    p2 = placeholder()

    a = abs(x)
    b = wrap_in_block(list(a.outputs) + [p1], "my_first_block")
    b = wrap_in_block(list(b.outputs) + [p2], "my_second_block")
    b = past_value(b.outputs[0])

    model = b.replace_placeholders({p1: b.outputs[0], p2: b.outputs[0]})

    x0 = [[1, 1], [2, 2]]
    with pytest.raises(RuntimeError):
        model.forward({x: x0}, model.outputs)
Exemple #9
0
def test_sanitize_batch_sparse():
    batch = [csr([[1, 0, 2], [2, 3, 0]]), csr([5, 0, 1])]

    var = sequence.input(3, is_sparse=True)
    b = sanitize_batch(var, batch)
    # 2 sequences, with max seq len of 2 and dimension 3
    assert b.shape == (2, 2, 3)
Exemple #10
0
def test_eval_sparse_dense(tmpdir, device_id):
    from cntk import Axis
    from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs
    from cntk.ops import input, times

    input_vocab_dim = label_vocab_dim = 69

    ctf_data = '''\
0	|S0 3:1 |# <s>	|S1 3:1 |# <s>
0	|S0 4:1 |# A	|S1 32:1 |# ~AH
0	|S0 5:1 |# B	|S1 36:1 |# ~B
0	|S0 4:1 |# A	|S1 31:1 |# ~AE
0	|S0 7:1 |# D	|S1 38:1 |# ~D
0	|S0 12:1 |# I	|S1 47:1 |# ~IY
0	|S0 1:1 |# </s>	|S1 1:1 |# </s>
2	|S0 60:1 |# <s>	|S1 3:1 |# <s>
2	|S0 61:1 |# A	|S1 32:1 |# ~AH
'''
    ctf_file = str(tmpdir / '2seqtest.txt')
    with open(ctf_file, 'w') as f:
        f.write(ctf_data)

    mbs = MinibatchSource(CTFDeserializer(
        ctf_file,
        StreamDefs(features=StreamDef(field='S0',
                                      shape=input_vocab_dim,
                                      is_sparse=True),
                   labels=StreamDef(field='S1',
                                    shape=label_vocab_dim,
                                    is_sparse=True))),
                          randomize=False,
                          epoch_size=2)

    raw_input = sequence.input(shape=input_vocab_dim,
                               sequence_axis=Axis('inputAxis'),
                               name='raw_input',
                               is_sparse=True)

    mb_valid = mbs.next_minibatch(minibatch_size_in_samples=100,
                                  input_map={raw_input: mbs.streams.features},
                                  device=cntk_device(device_id))

    z = times(raw_input, np.eye(input_vocab_dim))
    e_reader = z.eval(mb_valid, device=cntk_device(device_id))

    # CSR with the raw_input encoding in ctf_data
    one_hot_data = [[3, 4, 5, 4, 7, 12, 1], [60, 61]]
    data = [
        csr(np.eye(input_vocab_dim, dtype=np.float32)[d]) for d in one_hot_data
    ]
    e_csr = z.eval({raw_input: data}, device=cntk_device(device_id))
    assert np.all([np.allclose(a, b) for a, b in zip(e_reader, e_csr)])

    # One-hot with the raw_input encoding in ctf_data
    data = Value.one_hot(one_hot_data,
                         num_classes=input_vocab_dim,
                         device=cntk_device(device_id))
    e_hot = z.eval({raw_input: data}, device=cntk_device(device_id))
    assert np.all([np.allclose(a, b) for a, b in zip(e_reader, e_hot)])
def test_cos_distane_backward2():
  x = sequence.input(shape=(100,), sequence_axis=Axis("B"), needs_gradient=True)
  y = sequence.input(shape=(100,), sequence_axis=Axis("B"), needs_gradient=True)
  z = cosine_distance(x, y);
  np.random.seed(0)
  a = np.float32(np.random.rand(10,50,100))
  b = np.float32(np.random.rand(10,50,100))
  bwd, fwd = z.forward({x:a, y:b}, [z.output], set([z.output]))
  value = list(fwd.values())[0]
  expected_cos = numpy_cos(a,b)
  expected = expected_cos.forward()
  assert np.allclose(value, expected)
  grad = z.backward(bwd, {z.output:np.ones_like(value)}, set([x, y]))
  bwd = expected_cos.backward()
  x_driv_expected = bwd['a']
  y_driv_expected = bwd['b']
  assert (np.all(np.absolute(grad[x]-x_driv_expected) < 1e-6))
  assert (np.all(np.absolute(grad[y]-y_driv_expected) < 1e-6))
Exemple #12
0
def test_mask(batch, seq_starts, expected):
    shape = ()
    var = sequence.input(shape)
    if type(expected) == type(ValueError):
        with pytest.raises(expected):
            s = sanitize_batch(var, batch, seq_starts)
    else:
        s = sanitize_batch(var, batch, seq_starts)
        assert np.allclose(s.mask, expected)
Exemple #13
0
def run_distributed_training(tmpdir, create_func):

    in1 = sequence.input(shape=1)
    labels = sequence.input(shape=1)
    p = parameter(shape=2, init=10)
    z = plus(in1, reduce_sum(p), name='z')
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error(z, labels)

    momentum_time_constant = momentum_as_time_constant_schedule(1100)
    lr_per_sample = learning_rate_schedule(0.007, UnitType.sample)
    dist_learner = create_func(
        momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant,
                     True))

    communicator = dist_learner.communicator()
    workers = communicator.workers()
    current_worker = communicator.current_worker()
    found_rank = False
    for wk in workers:
        if current_worker.global_rank == wk.global_rank:
            found_rank = True

    assert found_rank

    trainer = Trainer(z, (ce, errs), [dist_learner])
    in1_value = [[1], [2]]
    label_value = [[0], [1]]
    arguments = {in1: in1_value, labels: label_value}
    z_output = z.output
    updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output])

    p = str(tmpdir / 'checkpoint.dat')
    trainer.save_checkpoint(p)
    trainer.restore_from_checkpoint(p)

    communicator.barrier()

    assert trainer.model.name == 'z'

    # Ensure that Swig is not leaking raw types
    assert isinstance(trainer.model, Function)
    assert trainer.model.__doc__
def create_recurrent_network():
    # Input variables denoting the features and label data
    features = sequence.input(((2*context+1)*feature_dim))
    labels = sequence.input((num_classes))

    # create network
    model = Sequential([For(range(3), lambda : Recurrence(LSTM(256))),
                        Dense(num_classes)])
    z = model(features)
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error    (z, labels)

    return {
        'feature': features,
        'label': labels,
        'ce' : ce,
        'errs' : errs,
        'output': z
    }
Exemple #15
0
def test_one_hot_int_types(dtype):
    data = [[0, 2, 1], [1]]
    if dtype is not None:
        data = [np.asarray(d, dtype=dtype) for d in data]
    a = Value.one_hot(data, 3)
    i = sequence.input(shape=(3, ))
    b = i * 1
    expected = [[[1., 0., 0.], [0., 0., 1.], [0., 1., 0.]], [[0., 1., 0.]]]
    for a, b in zip(b.eval({i: a}), expected):
        assert np.allclose(a, b)
Exemple #16
0
def test_eval_sparse_no_seq(batch_index_data, device_id):
    dim = 10
    multiplier = 2
    for var_is_sparse in [True, False]:
        in1 = sequence.input(shape=(dim, ), is_sparse=var_is_sparse)
        z = times(in1, multiplier * np.eye(dim))
        batch = np.eye(dim)[batch_index_data]
        expected = batch * multiplier
        sparse_val = csr(batch.astype('f'))
        result = z.eval({in1: [sparse_val]}, device=cntk_device(device_id))
        assert np.allclose(result, [expected])
Exemple #17
0
def test_sanitize_batch_contiguity():
    a1 = AA([[1, 2], [3, 4]])
    a2 = AA([[5, 6], [7, 8]])
    var = sequence.input((2, 2), is_sparse=True)

    batch = [a1.T, a2.T]
    with pytest.warns(RuntimeWarning):
        b = sanitize_batch(var, batch)
        assert b.shape == (2, 1, 2, 2)

    batch = [a1, a2]
    b = sanitize_batch(var, batch)
    assert b.shape == (2, 1, 2, 2)
Exemple #18
0
def test_eval_one_hot_seq(one_hot_batch, device_id):
    dim = 10
    multiplier = 2

    for var_is_sparse in [True, False]:
        in1 = sequence.input(shape=(dim,), is_sparse=var_is_sparse)
        # Convert CNTK node value to dense so that we can compare it later
        z = times(in1, np.eye(dim)*multiplier)
        # Convert expectation to dense
        expected = [np.eye(dim)[seq]*multiplier for seq in one_hot_batch]
        batch = Value.one_hot(one_hot_batch, num_classes=dim, device=cntk_device(device_id))
        result = z.eval({in1: batch}, device=cntk_device(device_id))
        assert np.all([np.allclose(a,b) for a,b in zip(result, expected)])
Exemple #19
0
def test_cosine_distance():
    a = np.reshape(np.arange(25.0, dtype=np.float32), (5, 5))
    b = np.reshape(np.arange(0, 5, dtype=np.float32), (1, 5))

    src = sequence.input(shape=(5), sequence_axis=Axis("Seq"))
    tgt = input(shape=(5))
    tgt_br = sequence.broadcast_as(tgt, src)
    cos_seq = cosine_distance(src, tgt_br)
    assert len(cos_seq.dynamic_axes) == 2
    assert cos_seq.dynamic_axes[1].name == "Seq"
    val = cos_seq.eval({src: [a], tgt: [b]})
    expected = [[1., 0.914659, 0.878459, 0.86155, 0.851852]]
    assert np.allclose(val, expected)
Exemple #20
0
def test_eval_sparse_seq_1(batch, device_id):
    dim = 4
    multiplier = 2
    for var_is_sparse in [True, False]:
        in1 = sequence.input(shape=(dim,), is_sparse=var_is_sparse)
        z = times(in1, multiplier*np.eye(dim))
        if isinstance(batch[0], list):
            expected = [np.vstack([m.todense() * multiplier for m in seq]) for seq in
                    batch]
        else:
            expected = [seq.todense() * multiplier for seq in batch]
        result = z.eval({in1: batch}, device=cntk_device(device_id))

        assert np.all([np.allclose(a,b) for a,b in zip(result, expected)]), \
                "%s != %s"%(result,expected)
Exemple #21
0
def test_op_times_sparse_grad(device_id, precision):
    dt_precision = PRECISION_TO_TYPE[precision]

    from cntk import times, times_transpose, parameter, reshape, Value, sequence
    dim = 5
    num_sequences = 2
    seq = [i for i in range(dim)]
    identity = np.identity(dim, dtype=dt_precision)
    input_data = Value.one_hot([seq]*num_sequences, dim, dtype=dt_precision)
    input_var  = sequence.input(shape=(dim), is_sparse=True, needs_gradient=False, dtype=dt_precision)
    e = parameter(shape = (dim, dim), init = identity, dtype=dt_precision)
    z = reshape(times_transpose(e, times(input_var, e)), dim)
    e_grad = z.grad({input_var : input_data}, [e])
    
    assert np.allclose(e_grad, np.ones((dim,dim))*4)
Exemple #22
0
def train_sequence_classifier(debug_output=False):
    input_dim = 2000
    cell_dim = 25
    hidden_dim = 25
    embedding_dim = 50
    num_output_classes = 5

    # Input variables denoting the features and label data
    features = sequence.input(shape=input_dim, is_sparse=True)
    label = input(num_output_classes)

    # Instantiate the sequence classification model
    classifier_output = LSTM_sequence_classifer_net(features,
                                                    num_output_classes,
                                                    embedding_dim, hidden_dim,
                                                    cell_dim)

    ce = cross_entropy_with_softmax(classifier_output, label)
    pe = classification_error(classifier_output, label)

    rel_path = ("../../../Tests/EndToEndTests/Text/" +
                "SequenceClassification/Data/Train.ctf")
    path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path)

    reader = create_reader(path, True, input_dim, num_output_classes)

    input_map = {
        features: reader.streams.features,
        label: reader.streams.labels
    }

    lr_per_sample = learning_rate_schedule(0.0005, UnitType.sample)
    # Instantiate the trainer object to drive the model training
    progress_printer = ProgressPrinter(0)
    trainer = Trainer(classifier_output, (ce, pe),
                      sgd(classifier_output.parameters, lr=lr_per_sample),
                      progress_printer)

    # Get minibatches of sequences to train with and perform model training
    minibatch_size = 200

    for i in range(255):
        mb = reader.next_minibatch(minibatch_size, input_map=input_map)
        trainer.train_minibatch(mb)

    evaluation_average = float(trainer.previous_minibatch_evaluation_average)
    loss_average = float(trainer.previous_minibatch_loss_average)
    return evaluation_average, loss_average
def create_network(input_vocab_dim, label_vocab_dim):
    # network complexity; initially low for faster testing
    hidden_dim = 256
    num_layers = 1

    # Source and target inputs to the model
    input_seq_axis = Axis('inputAxis')
    label_seq_axis = Axis('labelAxis')
    raw_input = sequence.input(shape=(input_vocab_dim), sequence_axis=input_seq_axis, name='raw_input')
    raw_labels = sequence.input(shape=(label_vocab_dim), sequence_axis=label_seq_axis, name='raw_labels')

    # Instantiate the sequence to sequence translation model
    input_sequence = raw_input

    # Drop the sentence start token from the label, for decoder training
    label_sequence = sequence.slice(raw_labels, 1, 0) # <s> A B C </s> --> A B C </s>
    label_sentence_start = sequence.first(raw_labels)        # <s>

    is_first_label = sequence.is_first(label_sequence)       # <s> 0 0 0 ...
    label_sentence_start_scattered = sequence.scatter(
        label_sentence_start, is_first_label)

    # Encoder
    encoder_outputH = stabilize(input_sequence)
    for i in range(0, num_layers):
        (encoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization(
            encoder_outputH.output, hidden_dim, hidden_dim, future_value, future_value)

    thought_vectorH = sequence.first(encoder_outputH)
    thought_vectorC = sequence.first(encoder_outputC)

    thought_vector_broadcastH = sequence.broadcast_as(
        thought_vectorH, label_sequence)
    thought_vector_broadcastC = sequence.broadcast_as(
        thought_vectorC, label_sequence)

    # Decoder
    decoder_history_hook = alias(label_sequence, name='decoder_history_hook') # copy label_sequence

    decoder_input = element_select(is_first_label, label_sentence_start_scattered, past_value(
        decoder_history_hook))

    decoder_outputH = stabilize(decoder_input)
    for i in range(0, num_layers):
        if (i > 0):
            recurrence_hookH = past_value
            recurrence_hookC = past_value
        else:
            isFirst = sequence.is_first(label_sequence)
            recurrence_hookH = lambda operand: element_select(
                isFirst, thought_vector_broadcastH, past_value(operand))
            recurrence_hookC = lambda operand: element_select(
                isFirst, thought_vector_broadcastC, past_value(operand))

        (decoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization(
            decoder_outputH.output, hidden_dim, hidden_dim, recurrence_hookH, recurrence_hookC)

    decoder_output = decoder_outputH

    # Softmax output layer
    z = linear_layer(stabilize(decoder_output), label_vocab_dim)

    # Criterion nodes
    ce = cross_entropy_with_softmax(z, label_sequence)
    errs = classification_error(z, label_sequence)

    # network output for decoder history
    net_output = hardmax(z)

    # make a clone of the graph where the ground truth is replaced by the network output
    ng = z.clone(CloneMethod.share, {decoder_history_hook.output : net_output.output})

    return {
        'raw_input' : raw_input,
        'raw_labels' : raw_labels,
        'ce' : ce,
        'pe' : errs,
        'ng' : ng,
        'output': z
    }
Exemple #24
0
def test_sweep_based_schedule(tmpdir, device_id):
    from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs
    from cntk import cross_entropy_with_softmax, classification_error, plus, reduce_sum, sequence
    from cntk import Trainer

    input_dim = 69

    ctf_data = '''\
0   |S0 3:1   |S1 3:1 |# <s>
0   |S0 4:1 |# A    |S1 32:1 |# ~AH
0   |S0 5:1 |# B    |S1 36:1 |# ~B
0   |S0 4:1 |# A    |S1 31:1 |# ~AE
0   |S0 7:1 |# D    |S1 38:1 |# ~D
0   |S0 12:1 |# I   |S1 47:1 |# ~IY
0   |S0 1:1 |# </s> |S1 1:1 |# </s>
2   |S0 60:1 |# <s> |S1 3:1 |# <s>
2   |S0 61:1 |# A   |S1 32:1 |# ~AH
'''
    ctf_file = str(tmpdir / '2seqtest.txt')
    with open(ctf_file, 'w') as f:
        f.write(ctf_data)

    mbs = MinibatchSource(CTFDeserializer(
        ctf_file,
        StreamDefs(features=StreamDef(field='S0',
                                      shape=input_dim,
                                      is_sparse=True),
                   labels=StreamDef(field='S1',
                                    shape=input_dim,
                                    is_sparse=True))),
                          randomize=False)

    in1 = sequence.input(shape=(input_dim, ))
    labels = sequence.input(shape=(input_dim, ))
    p = parameter(shape=(input_dim, ), init=10)
    z = plus(in1, reduce_sum(p), name='z')
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error(z, labels)

    lr_per_sample = learning_rate_schedule([0.3, 0.2, 0.1, 0.0],
                                           UnitType.sample)
    learner = sgd(z.parameters, lr_per_sample)
    trainer = Trainer(z, (ce, errs), [learner])

    input_map = {in1: mbs.streams.features, labels: mbs.streams.labels}

    # fetch minibatch (first sequence)
    data = mbs.next_minibatch(1, input_map=input_map)
    trainer.train_minibatch(data)
    assert learner.learning_rate() == 0.3

    # fetch minibatch (second sequence, sweep ends at this point)
    data = mbs.next_minibatch(1, input_map=input_map)
    trainer.train_minibatch(data)
    assert learner.learning_rate() == 0.2

    # fetch minibatch (both sequences -- entire sweep in one go)
    data = mbs.next_minibatch(9, input_map=input_map)
    trainer.train_minibatch(data)
    assert learner.learning_rate() == 0.1

    # fetch minibatch (multiple sweeps)
    data = mbs.next_minibatch(30, input_map=input_map)
    trainer.train_minibatch(data, outputs=[z.output])
    assert learner.learning_rate() == 0.0
Exemple #25
0
def test_distributed_mb_source(tmpdir):
    input_dim = 69

    ctf_data = '''\
0	|S0 3:1 |# <s>	|S1 3:1 |# <s>
0	|S0 4:1 |# A	|S1 32:1 |# ~AH
0	|S0 5:1 |# B	|S1 36:1 |# ~B
0	|S0 4:1 |# A	|S1 31:1 |# ~AE
0	|S0 7:1 |# D	|S1 38:1 |# ~D
0	|S0 12:1 |# I	|S1 47:1 |# ~IY
0	|S0 1:1 |# </s>	|S1 1:1 |# </s>
2	|S0 60:1 |# <s>	|S1 3:1 |# <s>
2	|S0 61:1 |# A	|S1 32:1 |# ~AH
2	|S0 61:1 |# A	|S1 32:1 |# ~AH
3	|S0 60:1 |# <s>	|S1 3:1 |# <s>
3	|S0 61:1 |# A	|S1 32:1 |# ~AH
3	|S0 61:1 |# A	|S1 32:1 |# ~AH
3	|S0 61:1 |# A	|S1 32:1 |# ~AH
4	|S0 60:1 |# <s>	|S1 3:1 |# <s>
5	|S0 60:1 |# <s>	|S1 3:1 |# <s>
5	|S0 61:1 |# A	|S1 32:1 |# ~AH
6	|S0 60:1 |# <s>	|S1 3:1 |# <s>
6	|S0 61:1 |# A	|S1 32:1 |# ~AH
7	|S0 60:1 |# <s>	|S1 3:1 |# <s>
8	|S0 60:1 |# <s>	|S1 3:1 |# <s>
8	|S0 61:1 |# A	|S1 32:1 |# ~AH
9	|S0 60:1 |# <s>	|S1 3:1 |# <s>
9	|S0 61:1 |# A	|S1 32:1 |# ~AH
10	|S0 61:1 |# A	|S1 32:1 |# ~AH
'''
    from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs, FULL_DATA_SWEEP

    ctf_file = str(tmpdir / '2seqtest.txt')
    with open(ctf_file, 'w') as f:
        f.write(ctf_data)

    # No randomization

    mb0 = MinibatchSource(CTFDeserializer(
        ctf_file,
        StreamDefs(features=StreamDef(field='S0',
                                      shape=input_dim,
                                      is_sparse=True),
                   labels=StreamDef(field='S1',
                                    shape=input_dim,
                                    is_sparse=True))),
                          randomize=False,
                          epoch_size=36)  # A bit more than a sweep
    mb1 = MinibatchSource(CTFDeserializer(
        ctf_file,
        StreamDefs(features=StreamDef(field='S0',
                                      shape=input_dim,
                                      is_sparse=True),
                   labels=StreamDef(field='S1',
                                    shape=input_dim,
                                    is_sparse=True))),
                          randomize=False,
                          epoch_size=36)  # A bit more than a sweep
    input = sequence.input(shape=(input_dim, ))
    label = sequence.input(shape=(input_dim, ))
    input_map = {input: mb0.streams.features, label: mb0.streams.labels}

    # Because we emulating two workers here, the minibatch_size_in_samples will be splitted in 2,
    # so below we expect 5 samples per worker.
    data = mb0.next_minibatch(minibatch_size_in_samples=10,
                              input_map=input_map,
                              num_data_partitions=2,
                              partition_index=0)
    assert (data[input].num_samples == 7)  # Sequence 0

    data = mb0.next_minibatch(minibatch_size_in_samples=10,
                              input_map=input_map,
                              num_data_partitions=2,
                              partition_index=0)
    assert (data[input].num_samples == 4)  # Sequence 3

    data = mb0.next_minibatch(minibatch_size_in_samples=10,
                              input_map=input_map,
                              num_data_partitions=2,
                              partition_index=0)
    assert (data[input].num_samples == 5)  # Sequences 5, 7, 9

    data = mb0.next_minibatch(minibatch_size_in_samples=10,
                              input_map=input_map,
                              num_data_partitions=2,
                              partition_index=0)
    assert (data[input].num_samples == 7)  # Sequence 0

    data = mb0.next_minibatch(minibatch_size_in_samples=10,
                              input_map=input_map,
                              num_data_partitions=2,
                              partition_index=0)
    assert (data[input].num_samples == 4)  # Sequence 3

    data = mb0.next_minibatch(minibatch_size_in_samples=10,
                              input_map=input_map,
                              num_data_partitions=2,
                              partition_index=0)
    assert (len(data) == 0)  # No data

    data = mb1.next_minibatch(minibatch_size_in_samples=10,
                              input_map=input_map,
                              num_data_partitions=2,
                              partition_index=1)
    assert (data[input].num_samples == 4)  # Sequences 2, 4

    data = mb1.next_minibatch(minibatch_size_in_samples=10,
                              input_map=input_map,
                              num_data_partitions=2,
                              partition_index=1)
    assert (data[input].num_samples == 5)  # Sequences 6, 8, 10

    data = mb1.next_minibatch(minibatch_size_in_samples=10,
                              input_map=input_map,
                              num_data_partitions=2,
                              partition_index=1)
    assert (data[input].num_samples == 3)  # Sequences 2

    data = mb1.next_minibatch(minibatch_size_in_samples=10,
                              input_map=input_map,
                              num_data_partitions=2,
                              partition_index=1)
    assert (len(data) == 0)  # No data

    # Radomization

    mb3 = MinibatchSource(CTFDeserializer(
        ctf_file,
        StreamDefs(features=StreamDef(field='S0',
                                      shape=input_dim,
                                      is_sparse=True),
                   labels=StreamDef(field='S1',
                                    shape=input_dim,
                                    is_sparse=True))),
                          randomize=True,
                          epoch_size=FULL_DATA_SWEEP)

    mb4 = MinibatchSource(CTFDeserializer(
        ctf_file,
        StreamDefs(features=StreamDef(field='S0',
                                      shape=input_dim,
                                      is_sparse=True),
                   labels=StreamDef(field='S1',
                                    shape=input_dim,
                                    is_sparse=True))),
                          randomize=True,
                          epoch_size=FULL_DATA_SWEEP)

    data = mb3.next_minibatch(minibatch_size_in_samples=10,
                              input_map=input_map,
                              num_data_partitions=2,
                              partition_index=0)
    assert (data[input].num_samples == 5)

    data = mb3.next_minibatch(minibatch_size_in_samples=10,
                              input_map=input_map,
                              num_data_partitions=2,
                              partition_index=0)
    assert (data[input].num_samples == 4)

    data = mb3.next_minibatch(minibatch_size_in_samples=10,
                              input_map=input_map,
                              num_data_partitions=2,
                              partition_index=0)
    assert (data[input].num_samples == 4)

    data = mb3.next_minibatch(minibatch_size_in_samples=10,
                              input_map=input_map,
                              num_data_partitions=2,
                              partition_index=0)
    assert (data[input].num_samples == 5)

    data = mb3.next_minibatch(minibatch_size_in_samples=10,
                              input_map=input_map,
                              num_data_partitions=2,
                              partition_index=0)
    assert (data[input].num_samples == 7)

    data = mb4.next_minibatch(minibatch_size_in_samples=10,
                              input_map=input_map,
                              num_data_partitions=2,
                              partition_index=1)
    assert (len(data) == 0
            )  # Due to chunking we do not expect any data for rank 1
Exemple #26
0
def test_one_hot_skip():
    a = Value.one_hot([[0, 1, Value.ONE_HOT_SKIP]], 3)
    i = sequence.input(shape=(3, ))
    b = i * 1
    expected = [[[1., 0., 0.], [0., 1., 0.], [0., 0., 0.]]]
    assert np.allclose(b.eval({i: a}), expected)