Esempio n. 1
0
def test_trainer_with_some_params_not_learned():
    input_dim = 2
    proj_dim = 2
    x = input_variable(shape=(input_dim,))
    W = parameter(shape=(input_dim, proj_dim), init=glorot_uniform())
    B = parameter(shape=(proj_dim,), init=glorot_uniform())
    t = times(x, W)
    z = t + B

    W_orig_value = W.value
    B_orig_value = B.value

    labels = input_variable(shape=(proj_dim,))
    ce = cross_entropy_with_softmax(z, labels)
    pe = classification_error(z, labels)

    lr_per_sample = learning_rate_schedule(0.1, UnitType.sample)
    trainer = Trainer(z, (ce, pe), sgd([W], lr_per_sample))

    x_value = [[1, 1],[2, 2]]
    label_value = [[0, 1], [1, 0]]
    arguments = {x: x_value, labels: label_value}

    num_iters = 3
    for i in range(num_iters):
        trainer.train_minibatch(arguments)

        assert np.array_equal(B.value, B_orig_value)
        assert not np.array_equal(W.value, W_orig_value)
        W_orig_value = W.value

    trainer.test_minibatch(arguments)
Esempio n. 2
0
def test_op_batch_normalization_spatial_shape_inference(channels, input_size, device_id, precision):
    dtype = PRECISION_TO_TYPE[precision]
    dev = cntk_device(device_id)

    spatial = True
    epsilon = 0.01

    init_scale = 1
    init_bias  = 2
    init_mean  = 3
    init_var   = 4
    init_count = 2

    shape = (channels, input_size, input_size)
    param_shape = (C.InferredDimension,)

    i = C.input_variable(shape, dtype=dtype)
    scale = C.parameter(param_shape, init=init_scale, dtype=dtype, device=dev)
    bias = C.parameter(param_shape, init=init_bias, dtype=dtype, device=dev)
    run_mean = C.constant(init_mean, shape=param_shape, dtype=dtype, device=dev)
    run_var = C.constant(init_var, shape=param_shape, dtype=dtype, device=dev)
    run_count = C.constant(init_count, shape=(), dtype=dtype, device=dev)

    bn = C.batch_normalization(i, scale, bias, run_mean, run_var, spatial, normalization_time_constant=-1, epsilon=epsilon, running_count = run_count)

    for param in [scale, bias, run_mean, run_var]:
        assert(param.shape == (channels,))
def test_convert_optimized_rnnstack(num_layers, bidirectional, recurrent_op, device_id):
    if device_id == -1:
        pytest.skip('only runs on GPU')

    input_dim = 5
    hidden_dim = 3
    data = [np.random.random((20,input_dim)).astype(np.float32), np.random.random((10,input_dim)).astype(np.float32), np.random.random((40,input_dim)).astype(np.float32)]
    input_var = C.sequence.input_variable(shape=(input_dim,))
    
    W1 = C.parameter((-1,1), init = C.glorot_uniform())
    W2 = C.parameter((-1,1), init = C.glorot_uniform())
    cudnn_rnn1 = C.optimized_rnnstack(input_var, W1, hidden_dim, num_layers=num_layers, bidirectional=bidirectional, recurrent_op=recurrent_op)
    dense1 = C.layers.Dense(hidden_dim)(cudnn_rnn1)
    cudnn_rnn2 = C.optimized_rnnstack(dense1, W2, hidden_dim, num_layers=num_layers, bidirectional=bidirectional, recurrent_op=recurrent_op)
    dense2 = C.layers.Dense(hidden_dim)(cudnn_rnn2)
    cudnn_rnn3 = C.optimized_rnnstack(dense2, W2, hidden_dim, num_layers=num_layers, bidirectional=bidirectional, recurrent_op=recurrent_op) # test shared parameter W2
    
    def blocked(d):
        blocked_W = C.parameter((-1,d), init = C.glorot_uniform())
        @C.layers.BlockFunction('', '')
        def func(x):
            return C.optimized_rnnstack(x, blocked_W, d, 1, recurrent_op='lstm')
        return func
    
    cudnn_model = C.layers.Sequential([blocked(hidden_dim), blocked(2*hidden_dim), blocked(3*hidden_dim)])(cudnn_rnn3)
    cudnn_out = cudnn_model.eval({input_var:data})

    model = C.misc.convert_optimized_rnnstack(cudnn_model)

    # make sure original cudnn model is intact
    cudnn_out2 = cudnn_model.eval({input_var:data})
    assert all(np.allclose(cudnn_out[i], cudnn_out2[i]) for i in range(len(cudnn_out)))

    model_out = model.eval({model.arguments[0]:data})
    assert all(np.allclose(cudnn_out[i], model_out[i]) for i in range(len(cudnn_out)))
Esempio n. 4
0
def seqcla():

    # LSTM params
    input_dim = 50
    output_dim = 128
    cell_dim = 128
    
    # model
    num_labels = 5
    vocab = 2000
    embed_dim = 50    

    t = C.dynamic_axis(name='t')
    features = C.sparse_input(vocab, dynamic_axis=t, name='features')    
    labels = C.input(num_labels, name='labels')
   
    train_reader = C.CNTKTextFormatReader(train_file)

    # setup embedding matrix
    embedding = C.parameter((embed_dim, vocab), learning_rate_multiplier=0.0, 
                             init_from_file_path=embedding_file)

    # get the vector representing the word
    sequence = C.times(embedding, features, name='sequence')
    
    # add an LSTM layer
    L = lstm_layer(output_dim, cell_dim, sequence, input_dim)
    
    # add a softmax layer on top
    w = C.parameter((num_labels, output_dim), name='w')
    b = C.parameter((num_labels), name='b')
    z = C.times(w, L) + b
    z.name='z'
    z.tag = "output"
    
    # and reconcile the shared dynamic axis
    pred = C.reconcile_dynamic_axis(z, labels, name='pred')    
    
    ce = C.cross_entropy_with_softmax(labels, pred)
    ce.tag = "criterion"
    
    my_sgd = C.SGDParams(epoch_size=0, minibatch_size=10, learning_rates_per_mb=0.1, max_epochs=3)    
    
    with C.LocalExecutionContext('seqcla') as ctx:
        # train the model
        ctx.train(root_nodes=[ce], training_params=my_sgd, input_map=train_reader.map(
                  features, alias='x', dim=vocab, format='Sparse').map(
                  labels, alias='y', dim=num_labels, format='Dense'))        
        
        # write out the predictions
        ctx.write(input_map=train_reader.map(
                  features, alias='x', dim=vocab, format='Sparse').map(
                  labels, alias='y', dim=num_labels, format='Dense'))
                  
        # do some manual accuracy testing
        acc = calc_accuracy(train_file, ctx.output_filename_base)
        
        # and test for the same number...
        TOLERANCE_ABSOLUTE = 1E-02
        assert np.allclose(acc, 0.6006415396952687, atol=TOLERANCE_ABSOLUTE)
Esempio n. 5
0
def test_cntk_basic():
    try:
        import tensorflow
        has_tensorflow = True
    except:
        has_tensorflow = False

    if has_tensorflow:
        tf_baseline_basic()
    else:
        cntk_baseline_basic()

    import cntk as C
    import cntk.contrib.crosstalk.crosstalk_cntk as crct
    ci = crct.instance
    ci.set_workdir(workdir)

    p1 = C.parameter(shape1)
    p2 = C.parameter(shape2)
    ci.watch(p1, 'p1')
    ci.watch({'param1':p1, 'param2':p2}, 'p1_p2', var_type=crct.DictParameterType)
    
    ci.assign('p1', load=True)
    assert np.isclose(p1.value, param1).all()
    
    ci.assign('p1_p2', load=True)
    assert np.isclose(p1.value, param1).all() and np.isclose(p2.value, param2).all()
    
    # test assign with value
    ci.assign('p1', value=param1)
    ci.assign('p1_p2', value={'param1':param1, 'param2':param2})
    
    ci.reset()
Esempio n. 6
0
def test_get_data_type():
    pa32 = C.parameter(init=np.asarray(2, dtype=np.float32))
    pa64 = C.parameter(init=np.asarray(2, dtype=np.float64))
    pl = C.placeholder(shape=(2))
    c = C.constant(value=3.0)
    n32 = AA(1, dtype=np.float32)
    n64 = AA(1, dtype=np.float64)

    assert get_data_type(pa32) == np.float32
    assert get_data_type(pa32, n32) == np.float32
    assert get_data_type(n32, n32) == np.float32
    assert get_data_type(n32, n64) == np.float64
    assert get_data_type(pl, n64) == np.float64
    assert get_data_type(pl, n32) == np.float32
    assert get_data_type(pl, pl) is None
    # variable's type shall take precedence over provided data
    assert get_data_type(pa32, n64) == np.float32
    assert get_data_type(pa64, n64) == np.float64
    assert get_data_type(pa32, pl, n64) == np.float32
    assert get_data_type(pa64, pl, n64) == np.float64
    
    assert get_data_type(np.float64(1)) == np.float64
    assert get_data_type(np.float32(1)) == np.float32
    assert get_data_type(np.int64(1)) == np.float32  # special case for cntk
    assert get_data_type(1) == np.float32
    assert get_data_type(1.0) == np.float32
Esempio n. 7
0
def test_noise_injection_with_checkpointing():
    from cntk import initializer
    shape = (100,100)

    w1 = parameter(shape=shape, init=initializer.glorot_uniform(seed=123))
    w2 = parameter(shape=shape, init=initializer.glorot_uniform(seed=123))
    w3 = parameter(shape=shape, init=initializer.glorot_uniform(seed=123))

    lr=learning_rate_schedule(0.5, UnitType.sample)
    m=C.momentum_schedule(0.99)

    learner1 = C.momentum_sgd([w1], lr, m, gaussian_noise_injection_std_dev=0.5)
    learner2 = C.momentum_sgd([w2], lr, m, gaussian_noise_injection_std_dev=0.5)
    learner3 = C.momentum_sgd([w3], lr, m, gaussian_noise_injection_std_dev=0.5)

    assert np.allclose(w1.value, w2.value) and np.allclose(w1.value, w3.value)

    for i in range(10):
        checkpoint = learner1.create_checkpoint()

        v =  np.float32(np.random.rand(100,100))

        learner1.update({w1: v}, 1)
        learner2.update({w2: v}, 1)
        assert not np.allclose(w1.value, w2.value)

        learner3.restore_from_checkpoint(checkpoint)
        learner3.update({w3: v}, 1)
        assert np.allclose(w1.value, w3.value)
Esempio n. 8
0
def _graph_dict():
    # This function creates a graph that has no real meaning other than
    # providing something to traverse.
    d = {}

    d['i1'] = C.sequence.input_variable(shape=(2, 3), sequence_axis=Axis('ia'), name='i1')
    d['c1'] = C.constant(shape=(2, 3), value=6, name='c1')
    d['p1'] = C.parameter(shape=(3, 2), init=7, name='p1')

    d['op1'] = C.plus(d['i1'], d['c1'], name='op1')
    d['op2'] = C.times(d['op1'], d['p1'], name='op2')

    #d['slice'] = slice(d['c1'], Axis.default_dynamic_axis(), 0, 3)
    #label_sentence_start = sequence.first(raw_labels)

    # no name
    d['p2'] = C.parameter(shape=(2, 2))

    # duplicate names
    d['op3a'] = C.plus(d['op2'], d['p2'], name='op3')
    d['op3b'] = C.plus(d['op3a'], d['p2'], name='op3')

    d['first'] = C.sequence.first(d['op3b'], name='past')

    d['root'] = d['first']

    return d
Esempio n. 9
0
def BinaryConvolution(operand,
                      filter_shape,
                      num_filters=1,
                      channels = 1,
                      init=C.glorot_uniform(),
                      pad=False,
                      strides=1,
                      bias=True,
                      init_bias=0,
                      op_name='BinaryConvolution', name=''):
    """ arguments:
            operand: tensor to convolve
            filter_shape: tuple indicating filter size
            num_filters: number of filters to use 
            channels: number of incoming channels
            init: type of initialization to use for weights
    """
    kernel_shape = (num_filters, channels) + filter_shape
    W = C.parameter(shape=kernel_shape, init=init, name="filter")

    binary_convolve_operand_p = C.placeholder(operand.shape, operand.dynamic_axes, name="operand")
    binary_convolve = C.convolution(CustomMultibit(W, 1), CustomMultibit(binary_convolve_operand_p, 1), auto_padding=[False, pad, pad], strides=[strides])
    r = C.as_block(binary_convolve, [(binary_convolve_operand_p, operand)], 'binary_convolve')

    bias_shape = (num_filters, 1, 1)
    b = C.parameter(shape=bias_shape, init=init_bias, name="bias")
    r = r + b

    # apply learnable param relu
    P = C.parameter(shape=r.shape, init=init, name="prelu")
    r = C.param_relu(P, r)
    return r
Esempio n. 10
0
def test_nce_loss(classes, xdim, batch, expected_value, device_id, precision):
    dt = PRECISION_TO_TYPE[precision]

    from cntk.losses import nce_loss
    import scipy

    x = C.input_variable(xdim, needs_gradient=True)
    y = C.input_variable(classes, is_sparse=True)

    x0 = np.arange(batch * xdim, dtype=dt).reshape((batch, xdim))/(batch * xdim)
    data = np.ones(batch, dtype=dt)
    indices = list(range(10,10*batch+1,10))
    indptr = list(range(batch+1))
    y0 = scipy.sparse.csr_matrix((data, indices, indptr), shape=(batch, classes))

    q = np.arange(classes, dtype=dt) + 1

    b = C.parameter((classes, 1), init=-np.log(classes))
    W = C.parameter((classes, C.InferredDimension), init=C.glorot_uniform(seed=98052))

    loss = C.nce_loss(W, b, x, y, q, seed=98052)
    v = loss.grad({x:x0, y:y0}, wrt=loss.parameters, as_numpy=False)
    for key in v:
        assert v[key].is_sparse, "gradient of nce_loss with respect to %s is not sparse"%key
    losses = np.zeros((100,batch))
    for i in range(100):
        losses[i,:] = loss.eval({x:x0, y:y0})
    assert np.allclose(np.mean(losses, axis=0), AA(expected_value))
def linear_layer(input_var, output_dim):
    input_dim = input_var.shape[0]
    times_param = C.parameter(shape=(input_dim, output_dim))
    bias_param = C.parameter(shape=(output_dim))

    t = C.times(input_var, times_param)
    return bias_param + t
Esempio n. 12
0
def test_eval_again_with_prev_outputs_live(device_id):
    x = C.input_variable(2)
    dev = cntk_device(device_id)
    w1 = C.parameter(init=np.asarray([1], dtype=np.float32), device=dev)
    w2 = C.parameter(init=np.asarray([-1], dtype=np.float32), device=dev)
    out1 = x + w1
    out2 = x + w2
    op = C.combine([out1, out2])

    result1 = op.eval({x : np.asarray([2, 5], dtype=np.float32)}, device=dev)
    assert np.array_equal(result1[out1.output], [[3, 6]])
    assert np.array_equal(result1[out2.output], [[1, 4]])

    result2 = op.eval({x : np.asarray([[-1, 4], [-4, 7]], dtype=np.float32)}, device=dev)
    assert np.array_equal(result2[out1.output], [[0, 5], [-3, 8]])
    assert np.array_equal(result2[out2.output], [[-2, 3], [-5, 6]])

    # result1 should still be valid
    assert np.array_equal(result1[out1.output], [[3, 6]])
    assert np.array_equal(result1[out2.output], [[1, 4]])

    result1 = op.eval({x : np.asarray([2, 5], dtype=np.float32)}, device=dev, as_numpy=False)
    assert np.array_equal(result1[out1.output].asarray(), [[3, 6]])
    assert np.array_equal(result1[out2.output].asarray(), [[1, 4]])

    result2 = op.eval({x : np.asarray([[-1, 4], [-4, 7]], dtype=np.float32)}, device=dev, as_numpy=False)
    assert np.array_equal(result2[out1.output].asarray(), [[0, 5], [-3, 8]])
    assert np.array_equal(result2[out2.output].asarray(), [[-2, 3], [-5, 6]])

    # Accessing result1 now will cause an error since it was a temporary that
    # is now erased, due to the subsequent eval call
    with pytest.raises(RuntimeError):
        assert np.array_equal(result1[out1.output].asarray(), [[3, 6]])

    grad_op = out1 + out2
    grad1 = grad_op.grad({x : np.asarray([2, 5], dtype=np.float32)}, wrt=[w1, w2], device=dev)
    assert np.array_equal(grad1[w1], [2])
    assert np.array_equal(grad1[w2], [2])

    grad2 = grad_op.grad({x : np.asarray([[-1, 4], [-4, 7]], dtype=np.float32)}, wrt=[w1, w2], device=dev)
    assert np.array_equal(grad2[w1], [4])
    assert np.array_equal(grad2[w2], [4])

    # grad1 should still be valid
    assert np.array_equal(grad1[w1], [2])
    assert np.array_equal(grad1[w2], [2])

    grad1 = grad_op.grad({x : np.asarray([2, 5], dtype=np.float32)}, wrt=[w1, w2], device=dev, as_numpy=False)
    assert np.array_equal(grad1[w1].asarray(), [2])
    assert np.array_equal(grad1[w2].asarray(), [2])

    grad2 = grad_op.grad({x : np.asarray([[-1, 4], [-4, 7]], dtype=np.float32)}, wrt=[w1, w2], device=dev, as_numpy=False)
    assert np.array_equal(grad2[w1].asarray(), [4])
    assert np.array_equal(grad2[w2].asarray(), [4])

    # Accessing grad1 now will cause an error since it was a temporary that
    # is now erased, due to the subsequent grad call
    with pytest.raises(RuntimeError):
        assert np.array_equal(grad1[w1].asarray(), [2])
Esempio n. 13
0
def linear_layer(input_var, output_dim):
    input_dim = input_var.shape[0]
    weight_param = C.parameter(shape=(input_dim, output_dim))
    bias_param = C.parameter(shape=(output_dim))

    param_dict['w'], param_dict['b'] = weight_param, bias_param

    return C.times(input_var, weight_param) + bias_param
Esempio n. 14
0
def test_gather_op(device_id, precision):
    a_data = [AA([[0],[1]], dtype=PRECISION_TO_TYPE[precision]),
              AA([[3],[4]], dtype=PRECISION_TO_TYPE[precision])]
    a = C.input_variable((2,1))
    r_data = np.arange(12).reshape(6,2).astype('f')
    r = C.parameter(shape=r_data.data, init=r_data)
    res = C.gather(r, a).eval({a:a_data})
    expectd = np.asarray([[[[0., 1.]],[[2., 3.]]],[[[6., 7.]],[[8.,9.]]]])
    assert np.array_equal(res, expectd)

    grads = C.gather(r, a).grad({a:a_data}, [r])
    expectd_grad = np.asarray([[1,1],[1,1],[0,0],[1,1],[1,1],[0,0]], dtype=np.float32)
    assert np.array_equal(grads, expectd_grad)

    #gather with indices from learning parameter (no gradients should passed through the indices -- 0s should be passed)
    indices_params = C.parameter(shape=(1,), init=1.0)
    grads = C.gather(r, (indices_params *a)).grad({a:a_data}, [r, indices_params])
    assert np.array_equal(grads[r], expectd_grad)
    assert np.array_equal(grads[indices_params], np.asarray([0.0], dtype=np.float32))


    b_data = [AA([[0,2],[1,3]], dtype=PRECISION_TO_TYPE[precision]),
              AA([[2,4],[3,5]], dtype=PRECISION_TO_TYPE[precision])]
    b = C.input_variable((2,2))
    res2 = C.gather(r, b).eval({b:b_data})

    expectd2 = np.asarray([[[[0., 1.],[4.,5.]],[[2., 3.],[6., 7.]]],[[[4., 5.],[8.,9.]],[[6., 7.], [10., 11.]]]])
    assert np.array_equal(res2, expectd2)

    #the following small model is to test the memory reuse issue of gather node.
    x = C.input((3, 4))
    x1 = C.to_sequence(x)
    w = C.parameter((5, 6), init=1)
    z = C.gather(w, x1)
    assert z.shape == (4, 6)
    #need the unpack node to trigger memory reuse.
    f = C.sequence.unpack(z, 0, no_mask_output=True)
    y = C.input((3, 4, 6))
    loss = C.reduce_mean(C.square(f - y), axis=-1)
    loss = C.reduce_mean(loss, axis=C.Axis.all_axes())

    g = C.constant(0, shape=w.shape)
    u = C.assign(w, g + 1)
    learner = C.cntk_py.universal_learner([w], [g], u)
    trainer = C.trainer.Trainer(loss, [loss], [learner])
    indices = np.asarray([[[1, 2, 1, 2]]])
    input = np.repeat(np.repeat(indices, 3, axis=1), 10, axis=0)
    lable = np.full((10, 3, 4, 6), 2)
    trainer.train_minibatch({x: input, y: lable})
    # the 2nd and 3rd rows should be udpated by gradients.
    assert np.mean(w.value[1, :]) < 1
    assert np.mean(w.value[2, :]) < 1
    # the other three rows should keep as 1
    assert np.isclose(np.mean(w.value[0, :]), 1)
    assert np.isclose(np.mean(w.value[3, :]), 1)
    assert np.isclose(np.mean(w.value[4, :]), 1)
Esempio n. 15
0
def test_ext_eval_5_times():
    dim = 2
    p_init = 10
    p = C.parameter(shape=(dim,), init=p_init, name='p')
    m = C.user_function(MyPlus(p, C.constant(3)))
    z = C.times(m, C.parameter(shape=(2, 50), init=2))

    result = z.eval()
    # No batch dimension since we have no input
    assert np.allclose(result, ((p_init * np.ones_like(result)) + 3) * 2 * 2)
Esempio n. 16
0
def test_h_softmax_for_sequence():
  input_dim = 2
  num_output_classes = 4
  minibatch_size = 3
  seq_size = 2
  n_classes = int(ceil(sqrt(num_output_classes)))
  n_outputs_per_class = n_classes

  w1 = C.parameter(shape=(input_dim, n_classes), init=C.glorot_normal(seed=2), name='w1')
  b1 = C.parameter(shape=(n_classes), init=C.glorot_normal(seed=3), name='b1')
  w2s = C.parameter(shape=(n_classes, input_dim, n_outputs_per_class), init=C.glorot_normal(seed=4), name='w2s')
  b2s = C.parameter(shape=(n_classes, n_outputs_per_class), init=C.glorot_normal(seed=5), name='b2s')

  # neural network structure for hierarchical softmax
  h_input = C.sequence.input_variable(input_dim)
  h_target_class = C.sequence.input_variable([1])
  h_target_output_in_class = C.sequence.input_variable([1])
  h_z, class_probs, all_probs = C.hierarchical_softmax_layer_for_sequence(h_input, num_output_classes, h_target_class, h_target_output_in_class, minibatch_size, w1, b1, w2s, b2s)

  a = np.reshape(np.arange(seq_size * minibatch_size * input_dim, dtype = np.float32), (seq_size, minibatch_size, input_dim))
  labels = np.reshape(np.arange(seq_size * minibatch_size, dtype = np.float32), (seq_size, minibatch_size, 1)) % num_output_classes
  target_labels = labels // n_outputs_per_class
  target_output_in_labels = labels % n_outputs_per_class
  val_z = h_z.eval({h_input: a, h_target_class: target_labels, h_target_output_in_class: target_output_in_labels})
  val_class_probs = class_probs.eval({h_input: a, h_target_class: target_labels, h_target_output_in_class: target_output_in_labels})
  val_all_probs = [x.eval({h_input: a, h_target_class: target_labels, h_target_output_in_class: target_output_in_labels}) for x in all_probs]

  expected_z = [[[ 0.16448107],
            [ 0.00597861],
            [ 0.99322051]],
            [[  8.59128195e-04],
            [  3.77086673e-09],
            [  3.42400197e-12]]]
  expected_class_probs = [[[  5.81252098e-01,   4.18747932e-01],
                          [  1.03938626e-02,   9.89606142e-01],
                          [  7.94661901e-05,   9.99920487e-01]],
                          [[  6.01340048e-07,   9.99999404e-01],
                          [  4.55011762e-09,   1.00000000e+00],
                          [  3.44291574e-11,   1.00000000e+00]]]
  expected_all_probs =  [[[[  1.64481074e-01,   4.16771024e-01],
                          [  4.41524992e-03,   5.97861316e-03],
                          [  4.61043091e-05,   3.33618809e-05]],
                          [[  4.33648694e-07,   1.67691354e-07],
                          [  3.77086673e-09,   7.79251219e-10],
                          [  3.10051568e-11,   3.42400197e-12]]],
                          [[[ 0.29590073,  0.12284722],
                          [ 0.93986785,  0.04973821],
                          [ 0.99322051,  0.00669997]],
                          [[  9.99140263e-01,   8.59128195e-04],
                          [  9.99890447e-01,   1.09594235e-04],
                          [  9.99986053e-01,   1.39711719e-05]]]]
                     
  assert np.allclose(expected_z, val_z)
  assert np.allclose(expected_class_probs, val_class_probs)
  assert np.allclose(expected_all_probs, val_all_probs)
Esempio n. 17
0
def hierarchical_softmax_layer(input_var, label_index, label_dim, label_classes=None):
    '''
    A two layers hierarchical softmax function:

    Args:
        input_var: Variable with shape: [#,*](dim_x)
        label_index: index of label's category:  [#,*](1)
        label_dim: number of the label categories
        label_classes: number of classes of the label categories
    Returns:
        output_prob: the probability of the given label [#,*](1)
        class_probs: the probability of all the label classes [#,*](label_classes)
        all_probs: the probability of all label classes 
    '''
    input_dim = input_var.shape[0]

    if not label_classes:
        label_classes = int(np.ceil(np.sqrt(float(label_dim))))

    n_outputs_per_class = int(np.ceil(label_dim / label_classes))

    target_class = C.floor((label_index + 0.5) / n_outputs_per_class)
    target_output_in_class = C.round(label_index - target_class * n_outputs_per_class)

    w1 = parameter(shape=(input_dim, label_classes), init=C.glorot_normal(), name='hsoftmax_w1')
    b1 = parameter(shape=(label_classes), init=C.glorot_normal(), name='hsoftmax_b1')
    w2s = parameter(shape=(label_classes, input_dim, n_outputs_per_class,), init=C.glorot_normal(), name='hsoftmax_w2s')
    b2s = parameter(shape=(label_classes, n_outputs_per_class,), init=C.glorot_normal(), name='hsoftmax_b2s')

    class_probs = softmax(b1 + times(input_var, w1))

    # TODO: fix the bug in backprop for sparse, and use sparse embedding to accelerate
    target_class_one_hot = C.one_hot(target_class, num_classes=label_classes, sparse_output=False)
    w2 = C.reshape(C.times(target_class_one_hot, w2s, output_rank=2), [input_dim, -1])
    b2 = C.reshape(times(target_class_one_hot, b2s, output_rank=1), [-1])
    probs_in_class = softmax(b2 + times(input_var, w2))

    prob_in_class = C.times_transpose(C.one_hot(target_output_in_class, num_classes=n_outputs_per_class, sparse_output=False), probs_in_class)
    class_prob = C.times_transpose(C.one_hot(target_class, num_classes=label_classes, sparse_output=False), class_probs)
    output_prob = prob_in_class * class_prob

    # this is for calculating all the outputs' probabilities
    all_probs = []
    for i in range(label_classes):
        ci = C.constant(i)
        ci_one_hot = C.one_hot(ci, num_classes=label_classes, sparse_output=False)
        w2a = C.times(ci_one_hot, w2s, output_rank=2)
        b2a = C.times(ci_one_hot, b2s, output_rank=1)
        probs_in_classa = C.softmax(b2a + times(input_var, w2a))
        class_proba = C.times_transpose(ci_one_hot, class_probs)
        output_proba = probs_in_classa * class_proba
        all_probs.append(output_proba)

    return output_prob, class_probs, all_probs
Esempio n. 18
0
def test_clone_with_function_in_substitution_map():
    input_dim = 1
    proj_dim = 2
    x = C.input_variable((input_dim,))
    w = C.parameter((input_dim, proj_dim))
    t = C.times(x, w)
    b = C.parameter((proj_dim))
    t_plus_b = t + b

    p = C.placeholder()
    just_b = t_plus_b.clone('clone', {t : p})
    t_plus_b_clone = just_b.clone('share', {p : t})
Esempio n. 19
0
def test_assign_to_param(input_data, device_id, precision):
    dt = PRECISION_TO_TYPE[precision]
    data = AA(input_data, dtype=dt)

    value = C.parameter(init=data)
    dest = C.parameter(shape=data.shape, dtype=dt)
    assign_op = C.assign(dest, value)

    result = assign_op.eval()

    assert np.array_equal(dest.asarray(), data)
    assert np.array_equal(result, data)
Esempio n. 20
0
def cntk_baseline_basic():
    import cntk as C
    import cntk.contrib.crosstalk.crosstalk_cntk as crct
    ci = crct.instance
    
    p1 = C.parameter(shape1, init=param1)
    p2 = C.parameter(shape2, init=param2)
    ci.watch(p1, 'p1')
    ci.watch({'param1':p1, 'param2':p2}, 'p1_p2', var_type=crct.DictParameterType)
    ci.set_workdir(workdir)
    ci.fetch('p1', save=True)
    ci.fetch('p1_p2', save=True)
    ci.reset()
Esempio n. 21
0
def test_0d_1d_parameter_set_value():
    x = C.input_variable(2)
    w_0d = C.parameter(())
    op = x + w_0d
    w_0d_grad = op.grad({x : np.asarray([1, 2], dtype=np.float32)}, wrt=[w_0d], as_numpy=False)
    w_0d.value = w_0d_grad.data
    assert w_0d.value == 2.

    w_1d = C.parameter(shape=2)
    op = x + w_1d
    w_1d_grad = op.grad({x : np.asarray([1, 2], dtype=np.float32)}, wrt=[w_1d], as_numpy=False)
    w_1d.value = w_1d_grad.data
    assert np.array_equal(w_1d.value, [1., 1.])
Esempio n. 22
0
def test_free_static_axis_in_recurrence():
    x = C.sequence.input_variable((C.FreeDimension, 2))
    out_placeholder = C.placeholder()
    out_past = C.sequence.past_value(out_placeholder)
    wh = C.parameter(init=np.asarray([[2, 5], [1, 3]], dtype=np.float32))
    wx = C.parameter(init=np.asarray([[1, 4], [2, 5]], dtype=np.float32))
    out = C.times(x, wx) + C.times(out_past, wh)
    out.replace_placeholders({out_placeholder : out})
    
    x_data = np.asarray([[0.5, 0.2], [-0.7, 1.2]], np.float32)
    w_grad, out_val = out.grad({x : x_data}, wrt=[wh, wx], outputs=[out])
    assert np.allclose(out_val, [[[[0.9, 3.], [1.7, 3.2]]]])
    assert np.allclose(w_grad[wx], [[-0.2, -0.2], [1.4, 1.4]])
Esempio n. 23
0
def test_validation_before_eval():
    w = C.parameter((4,C.InferredDimension))
    v = C.parameter((C.InferredDimension,5))
    wv = C.times(w,v)

    p = C.input((4,1))
    wp = C.times(w,p)

    q = C.input((1,5))
    qv = C.times(q,v)

    with pytest.raises(ValueError):
        wv.eval()
Esempio n. 24
0
def test_assign_dependency(input_data, device_id, precision):
    dt = PRECISION_TO_TYPE[precision]
    data = AA(input_data, dtype=dt)

    value = C.parameter(init=data)
    dest = C.parameter(shape=data.shape, dtype=dt)
    assign_op = C.assign(dest, value)
    y = dest + value

    result = C.combine([y, assign_op]).eval()

    assert np.array_equal(result[y.output], data)
    assert np.array_equal(dest.asarray(), data)
    assert np.array_equal(y.eval(), data + data)
Esempio n. 25
0
def test_as_composite():
    input_dim = 1
    proj_dim = 2
    x = C.input_variable((input_dim,))
    b = C.parameter((proj_dim))
    w = C.parameter((input_dim, proj_dim))
    func_name = 't_plus_b'
    t_plus_b = C.plus(C.times(x, w), b, name=func_name)
    assert(t_plus_b.root_function.name == func_name)
    composite = C.as_composite(t_plus_b.root_function)
    assert(composite.root_function.name == func_name)
    composite = C.as_composite(composite)
    assert(composite.root_function.name == func_name)
    composite = C.as_composite(t_plus_b)
    assert(composite.root_function.name == func_name)
Esempio n. 26
0
 def matching_attention_layer(self, attention_context):
     att_context = C.placeholder(shape=(2*self.hidden_dim,))
     #matching layer
     matching_model = C.layers.AttentionModel(attention_dim=self.hidden_dim, name='attention_model')
     #gate weight
     Wg = C.parameter(shape=(2*self.hidden_dim, 2*self.hidden_dim))
     #gru
     att_gru = C.layers.GRU(self.hidden_dim)
     @C.Function
     def out_func1(att_input, enc_input):
         enc_input2 = enc_input
         @C.Function
         def bigru_with_match(dh, x):
             c_att = matching_model(att_input, dh)
             x = C.splice(x, c_att)
             x = C.element_times(x, C.sigmoid(C.times(x, Wg)))
             return att_gru(dh, x)
         return C.splice(C.layers.Recurrence(bigru_with_match)(enc_input2),
                     C.layers.Recurrence(bigru_with_match, go_backwards=True)(enc_input2),
                     name="bigru_with_match")
     match_context = out_func1(att_context, att_context)
     return C.as_block(
         match_context,
         [(att_context, attention_context)],
         'matching_attention_layer',
         'matching_attention_layer')
Esempio n. 27
0
def test_times_2d_sparse_operand(device_id):
    from .. import times

    dev = cntk_device(device_id)

    vocab_size = 6
    sample_shape = (2, vocab_size)
    input_sparse_indices = [[1, 3], [2, 4], [0, 2]]
    input_data = C.Value.one_hot(input_sparse_indices, sample_shape, device=dev)

    a = C.input_variable(shape=sample_shape, is_sparse=True, needs_gradient=True, name='a')
    w_init = np.eye(vocab_size, dtype=np.float32)
    w = C.parameter(init=w_init, device=dev)
    a_dense = times(a, w)

    # TODO: Also test the results from grad
    grad = a_dense.grad({a : input_data}, [w, a], as_numpy=False, device=dev)

    res = a_dense.eval({a : input_data}, device=dev)
    assert np.array_equal(res, [[w_init[input_sparse_indices[0]]], [w_init[input_sparse_indices[1]]], [w_init[input_sparse_indices[2]]]])

    a_no_sequence = C.input_variable(shape=sample_shape, is_sparse=True, name='a', dynamic_axes=[C.Axis.default_batch_axis()])
    a_no_sequence_dense = times(a_no_sequence, w)
    res = a_no_sequence_dense.eval({a_no_sequence : input_data}, device=dev)
    assert np.array_equal(res, [w_init[input_sparse_indices[0]], w_init[input_sparse_indices[1]], w_init[input_sparse_indices[2]]])
Esempio n. 28
0
def test_gather_op(device_id, precision):
    a_data = [
        AA([[0], [1]], dtype=PRECISION_TO_TYPE[precision]),
        AA([[3], [4]], dtype=PRECISION_TO_TYPE[precision])
    ]
    a = C.input_variable((2, 1))
    r_data = np.arange(12).reshape(6, 2).astype('f')
    r = C.parameter(shape=r_data.data, init=r_data)
    res = C.gather(r, a).eval({a: a_data})
    expectd = np.asarray([[[[0., 1.]], [[2., 3.]]], [[[6., 7.]], [[8., 9.]]]])
    assert np.array_equal(res, expectd)

    grads = C.gather(r, a).grad({a: a_data}, [r])
    expectd_grad = np.asarray([[1, 1], [1, 1], [0, 0], [1, 1], [1, 1], [0, 0]],
                              dtype=np.float32)
    assert np.array_equal(grads, expectd_grad)

    b_data = [
        AA([[0, 2], [1, 3]], dtype=PRECISION_TO_TYPE[precision]),
        AA([[2, 4], [3, 5]], dtype=PRECISION_TO_TYPE[precision])
    ]
    b = C.input_variable((2, 2))
    res2 = C.gather(r, b).eval({b: b_data})

    expectd2 = np.asarray([[[[0., 1.], [4., 5.]], [[2., 3.], [6., 7.]]],
                           [[[4., 5.], [8., 9.]], [[6., 7.], [10., 11.]]]])
    assert np.array_equal(res2, expectd2)
Esempio n. 29
0
def test_ext_backpropstate(payload):
    class TestBackPropState(UserFunction):
        def __init__(self, arg, payload, name='f1'):
            self.payload = payload
            super(TestBackPropState, self).__init__([arg])

        def infer_outputs(self):
            return [
                C.output_variable(self.inputs[0].shape, self.inputs[0].dtype,
                                  self.inputs[0].dynamic_axes)
            ]

        def forward(self, argument, device=None, outputs_to_retain=None):
            return self.payload, argument

        def backward(self, state, root_gradients):
            assert state == self.payload
            return root_gradients

    dim = 4

    p = C.parameter(shape=(dim, ), init=10)
    in1 = C.input_variable(dim, needs_gradient=True, name='i_var')
    m = C.user_function(TestBackPropState(in1, payload))
    z = m + p

    lr_per_sample = C.learning_parameter_schedule(0.007, minibatch_size=1)
    trainer = C.Trainer(None, (z), [C.sgd(z.parameters, lr_per_sample)])

    for i in range(100):
        input_data = np.random.rand(dim)
        trainer.train_minibatch({in1: [input_data]})
def test_output_subset_evaluation(device_id):
    try:
        gpu_device = C.gpu(0)
    except ValueError:
        pytest.skip('Test only runs when GPU available')

    device = cntk_device(device_id)
    x1 = C.input_variable(shape=())
    op1 = C.constant(value=1, shape=(1), device=device) + (C.constant(value=1, shape=(1), device=device) + x1)

    x2 = C.input_variable(shape=(1))

    # Deliberately locate the parameter on a different device
    # instead of the actual compute target device, so that
    # if we try to use this parameter, it results in an error
    if (device.type() == 0):
        parameter_device = gpu_device
    else:
        parameter_device = C.cpu()
    p = C.parameter(shape=(1), init=C.glorot_uniform(), device=parameter_device)
    op2 = (x2 - C.constant(value=10, shape=(1), device=device)) - p

    op = C.combine([op1, op2]);

    _, result = op.forward({x1 : np.asarray([1, 2, 3])}, [op1], device=device)
    assert np.array_equal(result[op1], np.asarray([[3], [4], [5]]))
Esempio n. 31
0
def test_cntk_cudnn():
    try:
        import tensorflow
        has_tensorflow = True
    except:
        has_tensorflow = False

    if has_tensorflow:
        tf_baseline_lstm()
    else:
        cntk_baseline_lstm()

    import cntk as C
    import cntk.contrib.crosstalk.crosstalk_cntk as crct
    ci = crct.instance
        
    input_var = C.sequence.input(shape=(in_dim))
    data = {input_var:data_cntk}
    ci.set_data(data)
    ci.set_workdir(workdir)

    W = C.parameter((-1,dim,), init=C.glorot_uniform())
    cudnn_fwbw = C.optimized_rnnstack(input_var, W, dim, 1, bidirectional=True, recurrent_op='lstm')
    ci.watch(cudnn_fwbw, 'cntk_birnn_cudnn', var_type=cstk.RnnAttr,
          attr=cstk.RnnAttr(bidirectional=True, op_type='lstm', input_dim=in_dim, hidden_dim=dim, forget_bias=0))
    ci.watch(cudnn_fwbw, 'cntk_birnn_cudnn_out')
    
    ci.assign('cntk_birnn_cudnn', load=True, load_name='cntk_birnn')
    assert ci.compare('cntk_birnn_cudnn_out', compare_name='cntk_birnn_out')

    ci.fetch('cntk_birnn_cudnn', save=True)
    ci.assign('cntk_birnn_cudnn', load=True)
    assert ci.compare('cntk_birnn_cudnn_out', compare_name='cntk_birnn_out')
    
    ci.reset()
def test_trainer(tmpdir, no_eval_function):
    in1 = C.input_variable(shape=(1,))
    labels = C.input_variable(shape=(1,))
    p = parameter(shape=(2,), init=10)
    z = plus(in1, reduce_sum(p), name='z')
    ce = cross_entropy_with_softmax(z, labels)
    if no_eval_function:
        errs = None
    else:
        errs = classification_error(z, labels)

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_parameter_schedule(0.007, minibatch_size =1)
    trainer = C.Trainer(z, (ce, errs),
            [C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)])
    in1_value = [[1],[2]]
    label_value = [[0], [1]]
    arguments = {in1: in1_value, labels: label_value}
    z_output = z.output
    updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output])

    p = str(tmpdir / 'checkpoint.dat')
    external_state = {"additional external state":math.pi, "nested dict":{"a":"b"}, "list":[1,2,3]}
    trainer.save_checkpoint(p, external_state)
    restored_state = trainer.restore_from_checkpoint(p)

    assert external_state == restored_state

    assert trainer.model.name == 'z'

    # Ensure that Swig is not leaking raw types
    assert isinstance(trainer.model, Function)
    assert trainer.model.__doc__
    assert isinstance(trainer.parameter_learners[0], C.Learner)
Esempio n. 33
0
def test_learner_logging():
    from cntk import Trainer
    from cntk.logging import ProgressPrinter
    from cntk import cross_entropy_with_softmax, classification_error

    features = C.input_variable(shape=(1, ), needs_gradient=True, name='a')
    w_init = 1
    w = parameter(shape=(1, ), init=w_init)
    z = features * w
    labels = C.input_variable(shape=(1, ), name='b')
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error(z, labels)

    writer = TestProgressWriter()
    lr_values = [0.3, 0.2, 0.1, 0]
    m_values = [0.6, 0.7, 0.8]
    learner = C.momentum_sgd(
        z.parameters, learning_rate_schedule(lr_values, UnitType.sample, 1),
        C.momentum_schedule(m_values, 1))
    trainer = Trainer(z, (ce, errs), [learner], writer)

    for i in range(10):
        trainer.train_minibatch({features: [[2.]], labels: [[1.]]})

    assert len(writer.log_output) == len(lr_values + m_values)

    values = [j for i in zip(lr_values, m_values) for j in i] + [0]

    for i in range(len(values)):
        assert (values[i] == writer.log_output[i])
Esempio n. 34
0
def create_fast_rcnn_predictor(conv_out, rois, fc_layers, cfg):
    # RCNN
    roi_out = roipooling(conv_out, rois, cntk.MAX_POOLING, (cfg["MODEL"].ROI_DIM, cfg["MODEL"].ROI_DIM), spatial_scale=1/16.0)
    fc_out = fc_layers(roi_out)

    # prediction head
    W_pred = parameter(shape=(4096, cfg["DATA"].NUM_CLASSES), init=normal(scale=0.01), name="cls_score.W")
    b_pred = parameter(shape=cfg["DATA"].NUM_CLASSES, init=0, name="cls_score.b")
    cls_score = plus(times(fc_out, W_pred), b_pred, name='cls_score')

    # regression head
    W_regr = parameter(shape=(4096, cfg["DATA"].NUM_CLASSES*4), init=normal(scale=0.001), name="bbox_regr.W")
    b_regr = parameter(shape=cfg["DATA"].NUM_CLASSES*4, init=0, name="bbox_regr.b")
    bbox_pred = plus(times(fc_out, W_regr), b_regr, name='bbox_regr')

    return cls_score, bbox_pred
Esempio n. 35
0
    def embed(self):

        npglove = np.zeros((self.wg_dim, self.elmo_dim + self.hidden_dim),
                           dtype=np.float32)
        hf = h5py.File(
            os.path.join(self.abs_path, self.data_config['elmo_embedding']),
            'r')

        with open(os.path.join(self.abs_path,
                               self.data_config['glove_embedding']),
                  encoding='utf-8') as f:
            for line in f:
                parts = line.split()
                word = parts[0].lower()
                if word in self.vocab:
                    try:
                        if len(parts) == 301:
                            npglove[self.vocab[word], :300] = np.asarray(
                                [float(p) for p in parts[-300:]])
                            npglove[self.vocab[word],
                                    300:] = np.average(hf[word][:], axis=0)
                    except:
                        npglove[self.vocab[word],
                                300:] = np.average(hf['<UNK>'][:], axis=0)

        glove = C.constant(npglove)
        nonglove = C.parameter(shape=(self.wn_dim,
                                      self.elmo_dim + self.hidden_dim),
                               init=C.glorot_uniform(),
                               name='TrainableE')

        def func(wg, wn):
            return C.times(wg, glove) + C.times(wn, nonglove)

        return func
def test_empty_minibatch():
    scalar = C.input_variable((1,), dtype=np.float32, name='tscalar')
    op = scalar + parameter(init=np.asarray([1]), dtype=np.float32)

    lr_per_sample = C.learning_parameter_schedule(0.1,  minibatch_size =1)
    trainer = C.Trainer(op, (op, None), C.sgd(op.parameters, lr_per_sample))
    trainer.train_minibatch({})
Esempio n. 37
0
def test_trainer(tmpdir, no_eval_function):
    in1 = input(shape=(1, ))
    labels = input(shape=(1, ))
    p = parameter(shape=(2, ), init=10)
    z = plus(in1, reduce_sum(p), name='z')
    ce = cross_entropy_with_softmax(z, labels)
    if no_eval_function:
        errs = None
    else:
        errs = classification_error(z, labels)

    momentum_time_constant = momentum_as_time_constant_schedule(1100)
    lr_per_sample = learning_rate_schedule(0.007, UnitType.sample)
    trainer = Trainer(z, (ce, errs), [
        momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)
    ])
    in1_value = [[1], [2]]
    label_value = [[0], [1]]
    arguments = {in1: in1_value, labels: label_value}
    z_output = z.output
    updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output])

    p = str(tmpdir / 'checkpoint.dat')
    trainer.save_checkpoint(p)
    trainer.restore_from_checkpoint(p)

    assert trainer.model.name == 'z'

    # Ensure that Swig is not leaking raw types
    assert isinstance(trainer.model, Function)
    assert trainer.model.__doc__
    assert isinstance(trainer.parameter_learners[0], Learner)
Esempio n. 38
0
def test_rnn(device_id):
    if device_id == -1:
        pytest.skip('Test only runs on GPU')

    batch_size = 8
    sequence_len = 100
    vocab_dim = 20
    embed_dim = 10
    hidden_dim = 7
    input = C.cast(C.sequence.input_variable(()), np.float16)
    with C.default_options(dtype=np.float16):
        embed = C.layers.Embedding(embed_dim)(C.one_hot(input,
                                                        num_classes=vocab_dim,
                                                        sparse_output=False))
        z = C.layers.Recurrence(C.layers.LSTM(hidden_dim))(embed)

    feed = np.floor(
        np.random.rand(batch_size, sequence_len).astype(np.float32) *
        (vocab_dim - 1))
    z.grad(feed, wrt=z.parameters)

    num_layers = 2
    W = C.parameter((C.InferredDimension, embed_dim),
                    init=C.glorot_uniform(),
                    dtype=np.float16)
    with C.default_options(dtype=np.float16):
        z = C.optimized_rnnstack(embed, W, hidden_dim, num_layers)

    feed = np.floor(
        np.random.rand(batch_size, sequence_len).astype(np.float32) *
        (vocab_dim - 1))
    z.grad(feed, wrt=z.parameters)
Esempio n. 39
0
def test_OptimizedRNNStack(bidirectional, num_layers, input_size, hidden_size,
                           recurrent_op, tmpdir, device_id):
    if device_id == -1:
        pytest.skip('Test only runs on GPU')
    dev = cntk_device(device_id)
    from _cntk_py import constant_initializer
    model_filename = 'optimized_rnn_stack_' + (
        'bi' if bidirectional else 'uni') + '_layers' + str(
            num_layers) + '_inp' + str(input_size) + '_hid' + str(hidden_size)
    W = C.parameter((C.InferredDimension, input_size),
                    constant_initializer(0.1),
                    device=dev)
    x = C.sequence.input_variable(shape=(input_size, ))
    s = np.asarray(np.random.uniform(-1, 1, (5, input_size)), dtype=np.float32)
    f = C.optimized_rnnstack(x,
                             W,
                             hidden_size,
                             num_layers,
                             bidirectional=bidirectional,
                             recurrent_op=recurrent_op,
                             name='MyRnnStack')
    f.parameters[0].value = np.reshape(
        np.arange(np.prod(f.parameters[0].value.shape), dtype=np.float32),
        f.parameters[0].value.shape)
    verify_one_input(f, s, tmpdir, model_filename)
Esempio n. 40
0
def create_fast_rcnn_predictor(conv_out, rois, fc_layers):
    # RCNN
    roi_out = roipooling(conv_out, rois, cntk.MAX_POOLING, (roi_dim, roi_dim), spatial_scale=1/16.0)
    fc_out = fc_layers(roi_out)

    # prediction head
    W_pred = parameter(shape=(4096, globalvars['num_classes']), init=normal(scale=0.01), name="cls_score.W")
    b_pred = parameter(shape=globalvars['num_classes'], init=0, name="cls_score.b")
    cls_score = plus(times(fc_out, W_pred), b_pred, name='cls_score')

    # regression head
    W_regr = parameter(shape=(4096, globalvars['num_classes']*4), init=normal(scale=0.001), name="bbox_regr.W")
    b_regr = parameter(shape=globalvars['num_classes']*4, init=0, name="bbox_regr.b")
    bbox_pred = plus(times(fc_out, W_regr), b_regr, name='bbox_regr')

    return cls_score, bbox_pred
Esempio n. 41
0
def test_free_and_inferred_static_dimension():
    x = C.input_variable((C.FreeDimension, -1))
    w = C.parameter(init=np.asarray([[2, 5], [1, 3]], dtype=np.float32))
    t = C.times(x, w)

    x_data = np.asarray([[0.5, 0.2]], np.float32)
    w_grad, t_val = t.grad({x: x_data}, wrt=[w], outputs=[t])
    assert np.array_equal(t_val, np.asarray([[[1.2, 3.1]]], dtype=np.float32))
    assert np.array_equal(w_grad,
                          np.asarray([[0.5, .5], [.2, .2]], dtype=np.float32))

    x_data = np.asarray([[0.5, 0.2], [0.1, .6]], np.float32)
    w_grad, t_val = t.grad({x: x_data}, wrt=[w], outputs=[t])
    assert np.allclose(
        t_val, np.asarray([[[1.2, 3.1], [0.8, 2.3]]], dtype=np.float32))
    assert np.array_equal(w_grad,
                          np.asarray([[0.6, .6], [.8, .8]], dtype=np.float32))

    x_data = np.asarray([[0.5, 0.2]], np.float32)
    w_grad, t_val = t.grad({x: x_data}, wrt=[w], outputs=[t])
    assert np.array_equal(t_val, np.asarray([[[1.2, 3.1]]], dtype=np.float32))
    assert np.array_equal(w_grad,
                          np.asarray([[0.5, .5], [.2, .2]], dtype=np.float32))

    x_data = np.asarray([[0.5, 0.2, 0.9]], np.float32)
    with pytest.raises(ValueError):
        w_grad, t_val = t.grad({x: x_data}, wrt=[w], outputs=[t])
Esempio n. 42
0
def test_learner_logging():
    from cntk import Trainer
    from cntk.logging import ProgressPrinter
    from cntk import cross_entropy_with_softmax, classification_error

    features = C.input_variable(shape=(1,), needs_gradient=True, name='a')
    w_init = 1
    w = parameter(shape=(1,), init=w_init)
    z = features * w
    labels = C.input_variable(shape=(1,), name='b')
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error(z, labels)

    writer = TestProgressWriter();
    lr_values = [0.3, 0.2, 0.1, 0]
    m_values = [0.6, 0.7, 0.8]
    learner = C.momentum_sgd(z.parameters,
                  learning_rate_schedule(lr_values, UnitType.sample, 1),
                  C.momentum_schedule(m_values, 1))
    trainer = Trainer(z, (ce, errs), [learner], writer)

    for i in range(10):
        trainer.train_minibatch({features: [[2.]], labels: [[1.]]})

    assert len(writer.log_output) == len(lr_values + m_values)

    values = [j for i in zip(lr_values,m_values) for j in i] + [0]

    for i in range(len(values)):
        assert (values[i] == writer.log_output[i])
Esempio n. 43
0
def test_times_2d_sparse_operand(device_id):
    from .. import times

    dev = cntk_device(device_id)

    vocab_size = 6
    sample_shape = (2, vocab_size)
    input_sparse_indices = [[1, 3], [2, 4], [0, 2]]
    input_data = C.Value.one_hot(input_sparse_indices, sample_shape, device=dev)

    a = C.sequence.input(shape=sample_shape, is_sparse=True, needs_gradient=True, name='a')
    w_init = np.eye(vocab_size, dtype=np.float32)
    w = C.parameter(init=w_init, device=dev)
    a_dense = times(a, w)

    # TODO: Also test the results from grad
    grad = a_dense.grad({a : input_data}, [w, a], as_numpy=False, device=dev)

    res = a_dense.eval({a : input_data}, device=dev)
    assert np.array_equal(res, [[w_init[input_sparse_indices[0]]], [w_init[input_sparse_indices[1]]], [w_init[input_sparse_indices[2]]]])

    a_no_sequence = C.input(shape=sample_shape, is_sparse=True, name='a')
    a_no_sequence_dense = times(a_no_sequence, w)
    res = a_no_sequence_dense.eval({a_no_sequence : input_data}, device=dev)
    assert np.array_equal(res, [w_init[input_sparse_indices[0]], w_init[input_sparse_indices[1]], w_init[input_sparse_indices[2]]])
Esempio n. 44
0
    def session(is_sparse):
        x = C.input_variable((200,), is_sparse=is_sparse)
        w = C.parameter((200, 100))
        y = C.times(x, w)

        z = [0] * 100 + [1] * 100
        for i in range(200):
            j = (3 * i * i + 5 * i + 1) % 200  # just a random looking index
            z[i], z[j] = z[j], z[i]

        import scipy.sparse
        x11 = scipy.sparse.csr_matrix(np.array([1] * 200).astype('f'))
        x01 = scipy.sparse.csr_matrix(np.array(z).astype('f'))

        t = C.Trainer(y, y, learner(y.parameters))

        w.value = 0 * w.value
        t.train_minibatch({x: [x11]})
        t.train_minibatch({x: [x01]})
        t.train_minibatch({x: [x01]})
        if checkpoint:
            t.save_checkpoint(str(tmpdir.join('checkpoint')))
            t.train_minibatch({x: [x11]})
            t.train_minibatch({x: [x01]})
            t.train_minibatch({x: [x01]})
            t.restore_from_checkpoint(str(tmpdir.join('checkpoint')))
        t.train_minibatch({x: [x01]})
        t.train_minibatch({x: [x01]})
        t.train_minibatch({x: [x11]})
        return w.value
Esempio n. 45
0
    def __init__(self, n_in, n_out, init_lr, momentum):

        self.param1 = 512
        self.param2 = 256

        self.n_in = int(n_in)
        self.n_out = int(n_out)
        self.input = C.sequence.input_variable(shape=(self.n_in,))
        self.label = C.sequence.input_variable(shape=(self.n_out,))

        self.three_dnn = C.layers.Sequential([
            C.layers.Dense(self.param1, activation=C.tanh, name='dnn_three_1'),
            C.layers.Dense(self.param1, activation=C.tanh, name='dnn_three_2'),
            C.layers.Dense(self.param1, activation=C.tanh, name='dnn_three_3')])
        self.final_dnn = C.layers.Dense(self.n_out, name='dnn_final')
        self.dnn_1 = C.layers.Dense(8 * self.param2, bias=False, name='dnn_1')
        self.dnn_2 = C.layers.Dense(8 * self.param2, bias=False, name='dnn_2')
        self.dnn_3 = C.layers.Dense(8 * self.param2, bias=False, name='dnn_3')
        self.dnn_4 = C.layers.Dense(8 * self.param2, bias=False, name='dnn_4')
        self.list_bias = []
        for i in xrange(16):
            self.list_bias.append(C.parameter(shape=(self.param2, ), name='bias_' + str(i)))

        self.output = self.model(self.input)

        self.loss = loss_fun(self.output, self.label)
        self.eval_err = loss_fun(self.output, self.label)

        self.lr_s = C.learning_rate_schedule(init_lr, C.UnitType.sample)
        self.mom_s = C.momentum_schedule(momentum)
        self.learner = C.momentum_sgd(self.output.parameters, lr=self.lr_s, momentum=self.mom_s)
        self.trainer = C.Trainer(self.output, (self.loss, self.eval_err), [self.learner])
Esempio n. 46
0
    def session(is_sparse):
        x = C.input_variable((200, ), is_sparse=is_sparse)
        w = C.parameter((200, 100))
        y = C.times(x, w)

        z = [0] * 100 + [1] * 100
        for i in range(200):
            j = (3 * i * i + 5 * i + 1) % 200  # just a random looking index
            z[i], z[j] = z[j], z[i]

        import scipy.sparse
        x11 = scipy.sparse.csr_matrix(np.array([1] * 200).astype('f'))
        x01 = scipy.sparse.csr_matrix(np.array(z).astype('f'))

        t = C.Trainer(y, y, learner(y.parameters))

        w.value = 0 * w.value
        t.train_minibatch({x: [x11]})
        t.train_minibatch({x: [x01]})
        t.train_minibatch({x: [x01]})
        if checkpoint:
            t.save_checkpoint(str(tmpdir.join('checkpoint')))
            t.train_minibatch({x: [x11]})
            t.train_minibatch({x: [x01]})
            t.train_minibatch({x: [x01]})
            t.restore_from_checkpoint(str(tmpdir.join('checkpoint')))
        t.train_minibatch({x: [x01]})
        t.train_minibatch({x: [x01]})
        t.train_minibatch({x: [x11]})
        return w.value
Esempio n. 47
0
def test_empty_minibatch():
    scalar = input((1, ), dtype=np.float32, name='tscalar')
    op = scalar + parameter(init=np.asarray([1]), dtype=np.float32)

    lr_per_sample = learning_rate_schedule(0.1, UnitType.sample)
    trainer = Trainer(op, (op, None), sgd(op.parameters, lr_per_sample))
    trainer.train_minibatch({})
def test_cntk_embed():
    try:
        import tensorflow
        has_tensorflow = True
    except:
        has_tensorflow = False

    if has_tensorflow:
        tf_baseline_embed()
    else:
        cntk_baseline_embed()

    import cntk as C
    import cntk.contrib.crosstalk.crosstalk_cntk as crct
    ci = crct.instance
    ci.set_workdir(workdir)

    embed = C.parameter((
        input_dim,
        emb_dim,
    ))
    ci.watch(embed,
             'embed',
             var_type=cstk.EmbedAttr,
             attr=cstk.EmbedAttr(dict=dict2, input_dim=input_dim))

    ci.assign('embed', load=True)
    assert np.isclose(emb2, embed.value).all()

    # test assign with value
    ci.assign('embed', value={'a': emb1[0], 'b': emb1[1], 'c': emb1[2]})

    ci.reset()
Esempio n. 49
0
def OptimizedRnnStack(hidden_dim,
                      num_layers=1,
                      recurrent_op='gru',
                      bidirectional=False,
                      use_cudnn=True,
                      name=''):
    if use_cudnn:
        W = C.parameter(_INFERRED + (hidden_dim, ), init=C.glorot_uniform())

        def func(x):
            return C.optimized_rnnstack(x,
                                        W,
                                        hidden_dim,
                                        num_layers,
                                        bidirectional,
                                        recurrent_op=recurrent_op,
                                        name=name)

        return func
    else:

        def func(x):
            return C.splice(C.layers.Recurrence(C.layers.GRU(hidden_dim))(x),
                            C.layers.Recurrence(C.layers.GRU(hidden_dim),
                                                go_backwards=True)(x),
                            name=name)

        return func
Esempio n. 50
0
    def word_glove(self):
        # load glove
        if os.path.isfile('glove300.model'):
            print('[BUILD] load glove300.model')
            return C.load_model('glove300.model')
        npglove = np.zeros((self.wg_dim, self.word_emb_dim), dtype=np.float32)
        with open(os.path.join(self.abs_path, self.word_embed_file),
                  encoding='utf-8') as f:
            for line in f:
                parts = line.split()
                word = parts[0].lower()
                if self.vocab.get(word, self.wg_dim) < self.wg_dim:
                    npglove[self.vocab[word], :] = np.asarray(
                        [float(p) for p in parts[-300:]])
        glove = C.constant(npglove)
        nonglove = C.parameter(shape=(len(self.vocab) - self.wg_dim,
                                      self.word_emb_dim),
                               init=C.glorot_uniform(),
                               name='TrainableE')

        @C.Function
        def func(wg, wn):
            return C.times(wg, glove) + C.times(wn, nonglove)

        func.save('glove300.model')
        print('[BUILD] save glove300.model')
        return func
Esempio n. 51
0
def test_scalar_input():
    scalar = C.input_variable((1,), dtype=np.float32, name='tscalar')
    op = scalar + parameter(init=np.asarray([1]), dtype=np.float32)

    lr_per_sample = C.learning_rate_schedule(0.1, C.UnitType.sample)
    trainer = C.Trainer(op, (op, None), C.sgd(op.parameters, lr_per_sample))
    trainer.train_minibatch({scalar: np.zeros((2,1), dtype=np.float32)})
Esempio n. 52
0
 def create_model(self):
     self.input_dim = 1000
     self.embed_dim = 30
     i = C.input_variable((self.input_dim, ), is_sparse=True)
     self.p = C.parameter(shape=(self.input_dim, self.embed_dim), init=1)
     o = C.times(i, self.p)
     self.z = C.reduce_sum(o)
def test_data_type_inference():
    x_float = C.input_variable((1,), dtype = np.float64)
    param1 = C.parameter((C.InferredDimension, 1), init = C.glorot_uniform(), dtype = C.cntk_py.DataType_Unknown)
    assert (param1.get_data_type() == C.cntk_py.DataType_Unknown)

    x_times_param1 = C.times(x_float, param1)
    assert (param1.dtype == np.float64)
Esempio n. 54
0
def test_trainer(tmpdir, no_eval_function):
    in1 = input_variable(shape=(1,))
    labels = input_variable(shape=(1,))
    p = parameter(shape=(2,), init=10)
    z = plus(in1, reduce_sum(p), name='z')
    ce = cross_entropy_with_softmax(z, labels)
    if no_eval_function:
        errs = None
    else:
        errs = classification_error(z, labels)

    momentum_time_constant = momentum_as_time_constant_schedule(1100)
    lr_per_sample = learning_rate_schedule(0.007, UnitType.sample)
    trainer = Trainer(z, (ce, errs),
            [momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)])
    in1_value = [[1],[2]]
    label_value = [[0], [1]]
    arguments = {in1: in1_value, labels: label_value}
    z_output = z.output
    updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output])

    p = str(tmpdir / 'checkpoint.dat')
    trainer.save_checkpoint(p)
    trainer.restore_from_checkpoint(p)

    assert trainer.model.name == 'z'

    # Ensure that Swig is not leaking raw types
    assert isinstance(trainer.model, Function)
    assert trainer.model.__doc__
    assert isinstance(trainer.parameter_learners[0], Learner)
Esempio n. 55
0
def test_usermbsource_training(tmpdir):
    input_dim = 1000
    num_output_classes = 5

    mbs = MyDataSource(input_dim, num_output_classes)

    from cntk import sequence, parameter, plus, cross_entropy_with_softmax, \
            classification_error, learning_rate_schedule, sgd, Trainer, \
            training_session, times, UnitType, input

    feature = sequence.input(shape=(input_dim, ))
    label = input(shape=(num_output_classes, ))
    p = parameter(shape=(input_dim, num_output_classes), init=10)
    z = times(sequence.reduce_sum(feature), p, name='z')
    ce = cross_entropy_with_softmax(z, label)
    errs = classification_error(z, label)

    lr_per_sample = learning_rate_schedule([0.3, 0.2, 0.1, 0.0],
                                           UnitType.sample)
    learner = sgd(z.parameters, lr_per_sample)
    trainer = Trainer(z, (ce, errs), [learner])
    input_map = {feature: mbs.fsi, label: mbs.lsi}

    session = training_session(trainer=trainer,
                               mb_source=mbs,
                               model_inputs_to_streams=input_map,
                               mb_size=4,
                               max_samples=20)
    session.train()

    assert trainer.total_number_of_samples_seen == 20
Esempio n. 56
0
    def gated_attention_gru_layer(self, context, query):
        q_processed = C.placeholder(shape=(2*self.hidden_dim,))
        c_processed = C.placeholder(shape=(2*self.hidden_dim,))

        #gate weight
        Wg = C.parameter(shape=(4*self.hidden_dim, 4*self.hidden_dim))
        att_gru = C.layers.GRU(2*self.hidden_dim)
        attention_model = C.layers.AttentionModel(self.hidden_dim, name='attention_model')
        
        @C.Function
        def out_func0(att_input, enc_input):
            enc_input2 = enc_input
            @C.Function
            def gru_with_attentioin(dh, x):
                c_att = attention_model(att_input, x)
                x = C.splice(x, c_att)
                x = C.element_times(x, C.sigmoid(C.times(x, Wg)))
                return att_gru(dh, x)
            att_context = Recurrence(gru_with_attentioin)(enc_input2)
            return att_context
        att_context = out_func0(q_processed, c_processed)
        return C.as_block(
            att_context,
            [(c_processed, context), (q_processed, query)],
            'gated_attention_gru_layer',
            'gated_attention_gru_layer')
Esempio n. 57
0
    def create_trainer(use_sparse, device):
        a = C.sequence.input(shape=input_shape, is_sparse=use_sparse, name='input')
        w_i = C.parameter(init=w_init_i, device=dev)
        a_projection = times(a, w_i)

        p_o = C.placeholder()
        h = C.sequence.past_value(p_o)
        w_h = C.parameter(init=w_init_h, device=dev)
        h_projection = times(h, w_h)        
        z = a_projection + h_projection
        z = z.replace_placeholder(z)
        z = reshape(z, label_shape)

        l = C.sequence.input(shape=label_shape, is_sparse=use_sparse, name='label')
        loss = cross_entropy_with_softmax(z, l, axis=-1)
        trainer = C.Trainer(z, (loss, None), C.sgd(z.parameters, lr=C.learning_rate_schedule(0.7, C.UnitType.sample)))
        return (a, l, w_i, w_h, trainer)
Esempio n. 58
0
def create_fast_rcnn_predictor(conv_out, rois, fc_layers, cfg):
    # RCNN
    roi_out = roipooling(conv_out, rois, cntk.MAX_POOLING, (cfg["MODEL"].ROI_DIM, cfg["MODEL"].ROI_DIM),
                         spatial_scale=1 / 16.0)
    fc_out = fc_layers(roi_out)

    # prediction head
    W_pred = parameter(shape=(4096, cfg["DATA"].NUM_CLASSES), init=normal(scale=0.01), name="cls_score.W")
    b_pred = parameter(shape=cfg["DATA"].NUM_CLASSES, init=0, name="cls_score.b")
    cls_score = plus(times(fc_out, W_pred), b_pred, name='cls_score')

    # regression head
    W_regr = parameter(shape=(4096, cfg["DATA"].NUM_CLASSES * 4), init=normal(scale=0.001), name="bbox_regr.W")
    b_regr = parameter(shape=cfg["DATA"].NUM_CLASSES * 4, init=0, name="bbox_regr.b")
    bbox_pred = plus(times(fc_out, W_regr), b_regr, name='bbox_regr')

    return cls_score, bbox_pred
Esempio n. 59
0
def GenSimpleScan():
    feature = C.sequence.input_variable((128, ), np.float32)
    param = C.parameter(shape=(1, ), dtype=np.float32)
    scan = C.layers.Recurrence(lambda h, x: x + h + param)(feature)
    model = C.sequence.reduce_sum(scan)
    data_feature = np.random.rand(1, 64, 128).astype(np.float32)
    data_output = np.asarray(model.eval(data_feature), dtype=np.float32)
    Save('test_SimpleScan', model, data_feature, data_output)
Esempio n. 60
0
def test_0d_1d_parameter_set_value():
    x = C.input_variable(2)
    w_0d = C.parameter(())
    op = x + w_0d
    w_0d_grad = op.grad({x: np.asarray([1, 2], dtype=np.float32)},
                        wrt=[w_0d],
                        as_numpy=False)
    w_0d.value = w_0d_grad.data
    assert w_0d.value == 2.

    w_1d = C.parameter(shape=2)
    op = x + w_1d
    w_1d_grad = op.grad({x: np.asarray([1, 2], dtype=np.float32)},
                        wrt=[w_1d],
                        as_numpy=False)
    w_1d.value = w_1d_grad.data
    assert np.array_equal(w_1d.value, [1., 1.])