예제 #1
0
def test_slice_with_inferred_static_axis():
    x = C.input_variable(shape=(C.InferredDimension, C.InferredDimension, 3))
    padding_shape = (3, C.InferredDimension, 3)
    y = C.splice(C.constant(value=0, shape=padding_shape), x, axis=0)
    assert y.shape == (-1, -1, 3)
    y = C.splice(x, C.constant(value=0, shape=padding_shape), axis=0)
    assert y.shape == (-1, -1, 3)
예제 #2
0
def test_batchnorm(device_id):
    if device_id == -1:
        pytest.skip('Test only runs on GPU')
    shape = (3, )
    i = C.input_variable(shape, dtype='float16')
    scale = C.parameter(shape, init=1, dtype='float')
    bias = C.parameter(shape, init=2, dtype='float')
    run_mean = C.constant(3, shape=shape, dtype='float')
    run_variance = C.constant(4, shape=shape, dtype='float')
    run_count = C.constant(0, shape=(), dtype='float')

    bn = C.batch_normalization(i,
                               scale,
                               bias,
                               run_mean,
                               run_variance,
                               running_count=run_count,
                               spatial=False,
                               normalization_time_constant=5000,
                               blend_time_constant=0,
                               epsilon=0.00001,
                               use_cudnn_engine=True,
                               disable_regularization=True)

    data = AA([[1, 2, 3]]).astype(np.float16)
    bn.grad(data, wrt=[scale, bias])
예제 #3
0
def BatchNormalizationTester(map_rank=1,
                             init_scale=1,
                             init_bias=0,
                             normalization_time_constant=5000,
                             blend_time_constant=0,
                             epsilon=0.00001,
                             use_cntk_engine=True,
                             norm_shape=(),
                             init_mean=None,
                             init_variance=None,
                             name=''):
    """Instantiates a batch normalization layer for testing purposes, where mean
    and variance can be set.
    """
    # parameters bound to this Function
    scale = parameter(shape=norm_shape, init=init_scale, name='scale')
    bias = parameter(shape=norm_shape, init=init_bias, name='bias')
    run_mean = constant(shape=norm_shape, value=init_mean,
                        name='aggregate_mean')
    run_variance = constant(
        shape=norm_shape, value=init_variance, name='aggregate_variance')
    run_count = constant(0, shape=(), name='aggregate_count')

    # expression
    def batch_normalize(x):
        return batch_normalization(
            x, scale, bias, run_mean, run_variance, running_count=run_count,
            spatial=map_rank == 1,
            normalization_time_constant=normalization_time_constant,
            blend_time_constant=blend_time_constant, epsilon=epsilon,
            use_cudnn_engine=not use_cntk_engine)

    return batch_normalize
예제 #4
0
def test_output_subset_evaluation(device_id):
    try:
        gpu_device = C.gpu(0)
    except ValueError:
        pytest.skip('Test only runs when GPU available')

    device = cntk_device(device_id)
    x1 = C.input_variable(shape=())
    op1 = C.constant(value=1, shape=(1), device=device) + (
        C.constant(value=1, shape=(1), device=device) + x1)

    x2 = C.input_variable(shape=(1))

    # Deliberately locate the parameter on a different device
    # instead of the actual compute target device, so that
    # if we try to use this parameter, it results in an error
    if (device.type() == 0):
        parameter_device = gpu_device
    else:
        parameter_device = C.cpu()
    p = C.parameter(shape=(1),
                    init=C.glorot_uniform(),
                    device=parameter_device)
    op2 = (x2 - C.constant(value=10, shape=(1), device=device)) - p

    op = C.combine([op1, op2])

    _, result = op.forward({x1: np.asarray([1, 2, 3])}, [op1], device=device)
    assert np.array_equal(result[op1], np.asarray([[3], [4], [5]]))
예제 #5
0
def test_conv_with_freedim_model(tmpdir):    
    img_shape = (3, 32, 32)
    img = np.asarray(np.random.uniform(-1, 1, img_shape), dtype=np.float32)

    x = C.input_variable((3, C.FreeDimension, C.FreeDimension))

    conv_size1 = (32, 3, 5, 5)
    conv_map1 = C.constant(value=np.arange(np.prod(conv_size1), dtype=np.float32).reshape(conv_size1))
    conv_op1 = C.convolution(conv_map1, x, auto_padding=(False, True, True))
    relu_op1 = C.relu(conv_op1)
    maxpool_op1 = C.pooling(relu_op1, C.MAX_POOLING, (2, 2), (2, 2))

    conv_size2 = (64, 32, 3, 3)
    conv_map2 = C.constant(value=np.arange(np.prod(conv_size2), dtype=np.float32).reshape(conv_size2))
    conv_op2 = C.convolution(conv_map2, maxpool_op1, auto_padding=(False, True, True))
    relu_op2 = C.relu(conv_op2)
    root_node = C.pooling(relu_op2, C.MAX_POOLING, (2, 2), (2, 2))

    filename = os.path.join(str(tmpdir), R'conv_with_freedim.onnx')
    root_node.save(filename, format=C.ModelFormat.ONNX)

    loaded_node = C.Function.load(filename, format=C.ModelFormat.ONNX)
    assert root_node.shape == loaded_node.shape

    x_ = loaded_node.arguments[0]
    assert np.allclose(loaded_node.eval({x_:img}), root_node.eval({x:img}))

    # Additional test to ensure that loaded_node can be saved as both ONNX and CNTKv2 again.
    filename2 = os.path.join(str(tmpdir), R'conv_with_freedim2.onnx')
    loaded_node.save(filename2, format=C.ModelFormat.ONNX)

    filename3 = os.path.join(str(tmpdir), R'conv_with_freedim2.cntkmodel')
    loaded_node.save(filename3, format=C.ModelFormat.CNTKv2)
예제 #6
0
def test_slice_with_inferred_static_axis():
    x = C.input_variable(shape=(C.InferredDimension, C.InferredDimension, 3))
    padding_shape = (3, C.InferredDimension, 3)
    y = C.splice(C.constant(value=0, shape=padding_shape), x, axis=0)
    assert y.shape == (-1, -1, 3)
    y = C.splice(x, C.constant(value=0, shape=padding_shape), axis=0)
    assert y.shape == (-1, -1, 3)
예제 #7
0
def test_output_subset_evaluation(device_id):
    try:
        gpu_device = C.gpu(0)
    except ValueError:
        pytest.skip('Test only runs when GPU available')

    device = cntk_device(device_id)
    x1 = C.input_variable(shape=())
    op1 = C.constant(value=1, shape=(1), device=device) + (C.constant(value=1, shape=(1), device=device) + x1)

    x2 = C.input_variable(shape=(1))

    # Deliberately locate the parameter on a different device
    # instead of the actual compute target device, so that
    # if we try to use this parameter, it results in an error
    if (device.type() == 0):
        parameter_device = gpu_device
    else:
        parameter_device = C.cpu()
    p = C.parameter(shape=(1), init=C.glorot_uniform(), device=parameter_device)
    op2 = (x2 - C.constant(value=10, shape=(1), device=device)) - p

    op = C.combine([op1, op2]);

    _, result = op.forward({x1 : np.asarray([1, 2, 3])}, [op1], device=device)
    assert np.array_equal(result[op1], np.asarray([[3], [4], [5]]))
예제 #8
0
def test_op_batch_normalization_spatial_shape_inference(channels, input_size, device_id, precision):
    dtype = PRECISION_TO_TYPE[precision]
    dev = cntk_device(device_id)

    spatial = True
    epsilon = 0.01

    init_scale = 1
    init_bias  = 2
    init_mean  = 3
    init_var   = 4
    init_count = 2

    shape = (channels, input_size, input_size)
    param_shape = (C.InferredDimension,)

    i = C.input_variable(shape, dtype=dtype)
    scale = C.parameter(param_shape, init=init_scale, dtype=dtype, device=dev)
    bias = C.parameter(param_shape, init=init_bias, dtype=dtype, device=dev)
    run_mean = C.constant(init_mean, shape=param_shape, dtype=dtype, device=dev)
    run_var = C.constant(init_var, shape=param_shape, dtype=dtype, device=dev)
    run_count = C.constant(init_count, shape=(), dtype=dtype, device=dev)

    bn = C.batch_normalization(i, scale, bias, run_mean, run_var, spatial, normalization_time_constant=-1, epsilon=epsilon, running_count = run_count)

    for param in [scale, bias, run_mean, run_var]:
        assert(param.shape == (channels,))
def vggblock(x, arrays, layer_map, name):
    f = arrays[0]
    b = arrays[1]
    k = C.constant(value=f)
    t = C.constant(value=np.reshape(b, (-1, 1, 1)))
    y = C.relu(C.convolution(k, x, auto_padding=[False, True, True]) + t)
    layer_map[name] = y
    return y
예제 #10
0
def _test_eval_plus_two_constants():
    result = cntk.eval(
        cntk.plus(cntk.constant([1., 2., 3., 4.]),
                  cntk.constant([1., 1., 0., 0.])))
    TOLERANCE_ABSOLUTE = 1E-06
    assert np.allclose(result,
                       np.asarray([2., 3., 3., 4.]),
                       atol=TOLERANCE_ABSOLUTE)
예제 #11
0
def test_floor_division():
    x = [-3, 1, 2, 3, 4, 5.2]
    y = [2, 2, 2, 2, 2, 2]
    a = C.constant(x)
    b = C.constant(y)

    desired = [i // j for i, j in zip(x, y)]  # [-2, 0, 1, 1, 2, 2]
    result = floor_division(a, b).eval().tolist()
    assert result == desired
예제 #12
0
def test_gather_op_with_axis(device_id, precision):
    data = np.array([ [1.0, 1.2, 1.9], [2.3, 3.4, 3.9], [4.5, 5.7, 5.9], ]).astype(PRECISION_TO_TYPE[precision])
    indices = np.array([ 0, 2]).astype(PRECISION_TO_TYPE[precision]).astype(PRECISION_TO_TYPE[precision])
    output = np.array([ [1.0, 1.9], [2.3, 3.9], [4.5, 5.9], ]).astype(PRECISION_TO_TYPE[precision])
    x = C.constant(data)
    i = C.constant(indices)
    y = C.gather(x, i, axis=1)
    z = y.eval({}, device=cntk_device(device_id))
    assert np.allclose(output, z)
예제 #13
0
def test_remainder():
    x = [-3, 1, 2, 3, 4, 3, 5.123]
    y = [2, 2, 2, 2, 2, -2, -1.234]
    a = C.constant(x)
    b = C.constant(y)

    desired = [i % j for i, j in zip(x, y)]  # [1, 1, 0, 1, 0, -1, ...]
    result = remainder(a, b).eval().tolist()
    assert pytest.approx(result) == desired
예제 #14
0
def total_variation_loss(x):
    xx = C.reshape(x, (1,)+x.shape)
    delta = np.array([-1, 1], dtype=np.float32)
    kh = C.constant(value=delta.reshape(1, 1, 1, 1, 2))
    kv = C.constant(value=delta.reshape(1, 1, 1, 2, 1))
    dh = C.convolution(kh, xx, auto_padding=[False])
    dv = C.convolution(kv, xx, auto_padding=[False])
    avg = 0.5 * (C.reduce_mean(C.square(dv)) + C.reduce_mean(C.square(dh)))
    return avg
예제 #15
0
 def _load_proj(self):
     with h5py.File(self.weight_file,'r') as fin:
         weight = fin['CNN_proj']['W_proj'][...]
         bias = fin['CNN_proj']['b_proj'][...]
         W_proj = C.constant(weight)
         b_proj = C.constant(bias)
     @C.Function
     def dense(x):
         return C.relu(C.times(x, W_proj)+b_proj)
     self.proj = dense
예제 #16
0
 def __init__(self):
     self.EmbSrc = C.layers.Embedding(Config.EmbeddingSize,
                                      init=Config.defaultInit())
     self.EmbTrg = C.layers.Embedding(Config.EmbeddingSize,
                                      init=Config.defaultInit())
     self.EncoderL2R = RNN.GRUN(Config.EmbeddingSize, Config.SrcHiddenSize)
     self.EncoderR2L = RNN.GRUN(Config.EmbeddingSize, Config.SrcHiddenSize)
     self.Decoder = RNN.GRUN(
         Config.EmbeddingSize + Config.SrcHiddenSize * 2,
         Config.TrgHiddenSize)
     self.Wt = C.parameter(
         shape=(Config.TrgHiddenSize + Config.EmbeddingSize,
                Config.TrgVocabSize),
         init=Config.defaultInit())
     self.Wtb = C.parameter(shape=(Config.TrgVocabSize),
                            init=Config.defaultInit())
     self.WI = C.parameter(shape=(Config.SrcHiddenSize,
                                  Config.TrgHiddenSize),
                           init=Config.defaultInit())
     self.WIb = C.parameter(shape=(Config.TrgHiddenSize),
                            init=Config.defaultInit())
     self.Was = C.parameter(shape=(Config.SrcHiddenSize * 2,
                                   Config.TrgHiddenSize),
                            init=Config.defaultInit())
     self.Wat = C.parameter(shape=(Config.TrgHiddenSize,
                                   Config.TrgHiddenSize),
                            init=Config.defaultInit())
     self.Wav = C.parameter(shape=(Config.TrgHiddenSize, 1),
                            init=Config.defaultInit())
     self.firstHidden = C.constant(0,
                                   shape=(Config.BatchSize,
                                          Config.SrcHiddenSize))
     self.initTrgEmb = C.constant(0,
                                  shape=(1, Config.BatchSize,
                                         Config.EmbeddingSize))
     self.inputMatrixSrc = C.input_variable(
         shape=(Config.SrcMaxLength * Config.BatchSize,
                Config.SrcVocabSize),
         is_sparse=True)
     self.inputMatrixTrg = C.input_variable(
         shape=(Config.TrgMaxLength * Config.BatchSize,
                Config.TrgVocabSize),
         is_sparse=True)
     self.maskMatrixSrc = C.input_variable(shape=(Config.SrcMaxLength,
                                                  Config.BatchSize))
     self.maskMatrixTrg = C.input_variable(shape=(Config.TrgMaxLength,
                                                  Config.BatchSize))
     self.Parameters = [
         self.EmbSrc.E, self.EmbTrg.E, self.Wt, self.Wtb, self.WI, self.WIb,
         self.Was, self.Wat, self.Wav
     ]
     self.Parameters.extend(self.EncoderL2R.Parameters)
     self.Parameters.extend(self.EncoderR2L.Parameters)
     self.Parameters.extend(self.Decoder.Parameters)
예제 #17
0
    def __init__(self, loc, scale_diag):
        self.loc = np.array(loc)
        self.scale = np.array(scale_diag) * np.eye(self.loc.shape[0])

        self.loc, self.scale = self.loc.astype(np.float32), self.scale.astype(
            np.float32)
        self.shape = self.loc.shape
        self.mvn_pdf = C.mvn_pdf(C.constant(self.loc, name='loc'),
                                 C.constant(self.scale, name='scale'))
        self.mvn_log_prob = C.mvn_log_prob(
            C.constant(self.loc, name='loc'),
            C.constant(self.scale, name='scale'))
예제 #18
0
 def output_layer(self, query, match_context):
     q_processed = C.placeholder(shape=(2*self.hidden_dim,))
     mat_context = C.placeholder(shape=(2*self.hidden_dim,))
     
     #output layer
     r_q = question_pooling(q_processed, 2*self.hidden_dim) #shape n*(2*self.hidden_dim)
     p1_logits = attention_weight(mat_context, r_q, 2*self.hidden_dim)
     attention_pool = C.sequence.reduce_sum(p1_logits * mat_context)
     state = C.layers.GRU(2*self.hidden_dim)(attention_pool, r_q)
     p2_logits = attention_weight(mat_context, state, 2*self.hidden_dim)
     
     @C.Function
     def start_ave_point(p1_logits, p2_logits, point):
         @C.Function
         def start_ave(last, now):
             now = now + last - last
             new_start = now * C.sequence.gather(p2_logits, point)
             point = C.sequence.future_value(point)
             return new_start
         start_logits_ave = C.layers.Recurrence(start_ave)(p1_logits)
         return start_logits_ave
     point = C.sequence.is_first(p1_logits)
     point = C.layers.Sequential([For(range(2), lambda: C.layers.Recurrence(C.plus))])(point)
     point = C.greater(C.constant(16), point)
     start_logits_ave = start_ave_point(p1_logits, p2_logits, point)
     
     @C.Function
     def end_ave_point(p1_logits, p2_logits, point):
         @C.Function
         def end_ave(last, now):
             now = now + last - last
             new_end = now * C.sequence.gather(p2_logits, point)
             point = C.sequence.past_value(point)
             return new_end
         end_logits_ave = C.layers.Recurrence(end_ave, go_backwards=True)(p2_logits)
         return end_logits_ave
     point = C.sequence.is_last(p1_logits)
     point = C.layers.Sequential([For(range(2), lambda: C.layers.Recurrence(C.plus, go_backwards=True))])(point)
     point = C.greater(C.constant(16),point)
     end_logits_ave = end_ave_point(p1_logits, p2_logits, point)
     
     start_logits = seq_hardmax(start_logits_ave)
     end_logits = seq_hardmax(end_logits_ave)
     '''
     start_logits = seq_hardmax(p1_logits)
     end_logits = seq_hardmax(p2_logits)
     '''
     return C.as_block(
         C.combine([start_logits, end_logits]),
         [(q_processed, query), (mat_context, match_context)],
         'output_layer',
         'output_layer')
예제 #19
0
파일: reshaper.py 프로젝트: roya0045/cvar2
    def var(array,W=_W,B=None,square=0,sqrt=0,V=False,sizz=0):
        #W=tf.transpose(W, [0,2,3,1])
        
        arrs=array.shape
        ashp=W.shape
        sb=(W.shape[1],1,1)
        WV=W.shape[-2:]
        xi=(-2,-1)
        x2=(-2,-1,-3)

        if V:
            print(W.eval())
            print(arrs,ashp)
        mul=(array*W)

        if V:
            print('Wsamp',W[-1,-1].eval())
            print('array*w',(mul.eval())[0,-1])

        size=C.reduce_sum(W,axis=xi)#shape=(outputs, channel)

        if V:
            print("sizesamp",size.shape,size.eval())
        if B is None:
            B=C.constant(0,shape=W.shape[0:2],dtype=np.float32)#channel
        B=C.reshape(B,(*B.shape,*[1 for _ in range(len(ashp)-len(B.shape))]))
        if sizz==1:
            mean=C.reduce_sum(mul,axis=xi)/size
        else:
            mean=C.reduce_sum(mul,axis=xi)/C.constant(value=WV[0]*WV[1],shape=sb,dtype=np.float32)
        if V:
            print("meansamp",mean.eval()[0,-1])
        if square:
            i=(C.square(mul-mean)+B)
        else:
            i=(((mul)-mean)+B)
        di=i/size
        if V==2:
            print("i",i.eval(),"i")
            print("di",di.eval(),"di")
        if V:
            print('isamp',i.shape,i.eval()[-1,-1,])
        out=C.reduce_sum(i+B,axis=x2)
        #out=np.rollaxis(np.sum(i+B,axis=x2),-1,1)
        print(out.shape)
        if sqrt:
            out=C.sqrt(out)
        out=C.swapaxes(C.reshape(out,out.shape[:4]), 3, 1)
        print(out.shape)
        assert out.shape==(arrs[0],ashp[0],arrs[1],arrs[2])
        return(out)
예제 #20
0
def test_nce_backward_indices(classes, xdim, batch, expected_value, device_id,
                              precision):
    """
    Simple test that makes sure that the derivatives have the correct sparsity pattern
    """

    # ignore precision, only sparsity pattern matters for this test
    dt = np.float32

    from cntk.losses import nce_loss
    import scipy
    trials = 10

    # Establish baseline
    expected_count = np.zeros(classes)
    I = C.constant(np.eye(classes, dtype=dt))
    q = np.arange(classes, dtype=dt) + 1
    z = C.reduce_sum(C.times(C.random_sample(q, 32, True, seed=98052), I),
                     axis=0)
    for i in range(trials):
        expected_count[np.nonzero(z.eval().ravel())] += 1

    # Set things up to measure the same thing with nce_loss

    x = C.input_variable(xdim, needs_gradient=True)
    y = C.input_variable(classes, is_sparse=True)

    x0 = np.arange(batch * xdim, dtype=dt).reshape(
        (batch, xdim)) / (batch * xdim)
    data = np.ones(batch, dtype=dt)
    indices = list(range(10, 10 * batch + 1, 10))
    indptr = list(range(batch + 1))
    y0 = scipy.sparse.csr_matrix((data, indices, indptr),
                                 shape=(batch, classes))

    b = C.parameter((classes, 1))
    W = C.parameter((classes, C.InferredDimension))

    gb = np.zeros(classes)
    vb = C.input_variable((classes, 1), dtype=dt)
    Ib = C.constant(np.eye(1, dtype=dt))
    zb = C.times(vb, Ib)

    loss = C.nce_loss(W, b, x, y, q, seed=98052)
    for i in range(trials):
        v = loss.grad({x: x0, y: y0}, wrt=loss.parameters, as_numpy=False)
        gb[np.nonzero(zb.eval({vb: v[b]}).ravel())] += 1
    for i in range(classes):
        assert gb[i] == expected_count[i] or (i in indices and gb[i] == trials)
예제 #21
0
def test_override_serialize(tmpdir):
    dev = C.cpu()
    a, b = 1.2322341, -0.29084
    op = MyPlusPlus([C.constant(a), C.constant(b)], '++')
    op = MyPlusPlus([op, op], '+++')
    op = MyPlusPlus([op, op], '++++')
    op = C.user_function(op)
    result1 = op.eval({}, device=dev)

    filepath = str(tmpdir / 'test_udf_with_renamed_deserialize.dat')
    op.save(filepath)

    op_reloaded = Function.load(filepath, device=dev)

    assert result1 == op_reloaded.eval({}, device=dev)
예제 #22
0
def test_override_serialize(tmpdir):
    dev = C.cpu()
    a, b = 1.2322341, -0.29084
    op = MyPlusPlus([C.constant(a), C.constant(b)], '++')
    op = MyPlusPlus([op, op], '+++')
    op = MyPlusPlus([op, op], '++++')
    op = C.user_function(op)
    result1 = op.eval({}, device=dev)

    filepath = str(tmpdir / 'test_udf_with_renamed_deserialize.dat')
    op.save(filepath)

    op_reloaded = Function.load(filepath, device=dev)

    assert result1 == op_reloaded.eval({}, device=dev)
def _simple_dict():
    d = {}

    d['i1'] = C.input_variable(shape=(2, 3), name='i1')
    d['c1'] = C.constant(shape=(2, 3), value=6, name='c1')
    d['p1'] = C.parameter(shape=(3, 2), init=7, name='p1')
    d['op1'] = C.plus(d['i1'], d['c1'], name='op1')
    d['op2'] = C.times(d['op1'], d['p1'], name='op2')
    d['root'] = d['op2']

    d['target'] = C.input_variable((), name='label')
    d['all'] = C.combine([d['root'], C.minus(
        d['target'], C.constant(1, name='c2'), name='minus')], name='all')

    return d
예제 #24
0
def _simple_dict():
    d = {}

    d['i1'] = C.input_variable(shape=(2, 3), name='i1')
    d['c1'] = C.constant(shape=(2, 3), value=6, name='c1')
    d['p1'] = C.parameter(shape=(3, 2), init=7, name='p1')
    d['op1'] = C.plus(d['i1'], d['c1'], name='op1')
    d['op2'] = C.times(d['op1'], d['p1'], name='op2')
    d['root'] = d['op2']

    d['target'] = C.input_variable((), name='label')
    d['all'] = C.combine([d['root'], C.minus(
        d['target'], C.constant(1, name='c2'), name='minus')], name='all')

    return d
예제 #25
0
def test_convolution_attributes():
    x = C.input_variable( (1, 5, 5) )
    filter = np.reshape(np.array([2, -1, -1, 2], dtype = np.float32), (1, 2, 2))
    kernel = C.constant(value = filter)
    f = C.convolution(kernel , x, auto_padding = [False])
    d = f.root_function.attributes
    expected = {'autoPadding': [False, False, False], 
        'sharing': [True, True, True], 
        'strides': (1, 1, 1), 
        'maxTempMemSizeInSamples': 0, 
        'upperPad': (0, 0, 0), 
        'lowerPad': (0, 0, 0),
        'transpose': False,
        'outputShape': (0,)
        }
    _check(expected, d)

    f = C.convolution(kernel , x, auto_padding = [False, True])
    d = f.root_function.attributes
    expected = {'autoPadding': [False, False, True], 
        'sharing': [True, True, True], 
        'strides': (1, 1, 1), 
        'maxTempMemSizeInSamples': 0, 
        'upperPad': (0, 0, 0), 
        'lowerPad': (0, 0, 0),
        'transpose': False,
        'outputShape': (0,)
        }
    _check(expected, d)
예제 #26
0
 def multiFunc(self, arg1):
     # load or create the inputs we need
     multiIn = C.input(shape=arg1.shape, dynamic_axes = arg1.dynamic_axes)
     bit_map = C.constant(self.bit_map)
     max_bits = self.bit_map.max()
     shape = multiIn.shape
     reformed = C.reshape(multiIn, (-1,))
     # lets compute the means we need
     # carry over represents the remaining value that needs to binarized. For a single bit, this is just the input. For more bits,
     # it is the difference between the previous bits approximation and the true value.
     carry_over = multiIn
     approx = C.element_times(multiIn, 0)
     # iterate through the maximum number of bits specified by the bit maps, basically compute each level of binarization
     for i in range(max_bits):
         # determine which values of the input should be binarized to i bits or more
         hot_vals = C.greater(bit_map, i)
         # select only the values which we need to binarize
         valid_vals = C.element_select(hot_vals, carry_over, 0)
         # compute mean on a per kernel basis, reshaping is done to allow for sum reduction along only axis 0 (the kernels)
         mean = C.element_divide(C.reduce_sum(C.reshape(C.abs(valid_vals), (valid_vals.shape[0], -1)), axis=1), C.reduce_sum(C.reshape(hot_vals, (hot_vals.shape[0], -1)), axis=1))
         # reshape the mean to match the dimensionality of the input
         mean = C.reshape(mean, (mean.shape[0], mean.shape[1], 1, 1))
         # binarize the carry over
         bits = C.greater(carry_over, 0)
         bits = C.element_select(bits, bits, -1)
         bits = C.element_select(hot_vals, bits, 0)
         # add in the equivalent binary representation to the approximation
         approx = C.plus(approx, C.element_times(mean, bits))
         # compute the new carry over
         carry_over = C.plus(C.element_times(C.element_times(-1, bits), mean), carry_over)
         
     return approx, multiIn
예제 #27
0
파일: utils_test.py 프로젝트: delpart/CNTK
def test_get_data_type():
    pa32 = C.parameter(init=np.asarray(2, dtype=np.float32))
    pa64 = C.parameter(init=np.asarray(2, dtype=np.float64))
    pl = C.placeholder(shape=(2))
    c = C.constant(value=3.0)
    n32 = AA(1, dtype=np.float32)
    n64 = AA(1, dtype=np.float64)

    assert get_data_type(pa32) == np.float32
    assert get_data_type(pa32, n32) == np.float32
    assert get_data_type(n32, n32) == np.float32
    assert get_data_type(n32, n64) == np.float64
    assert get_data_type(pl, n64) == np.float64
    assert get_data_type(pl, n32) == np.float32
    assert get_data_type(pl, pl) is None
    # variable's type shall take precedence over provided data
    assert get_data_type(pa32, n64) == np.float32
    assert get_data_type(pa64, n64) == np.float64
    assert get_data_type(pa32, pl, n64) == np.float32
    assert get_data_type(pa64, pl, n64) == np.float64
    
    assert get_data_type(np.float64(1)) == np.float64
    assert get_data_type(np.float32(1)) == np.float32
    assert get_data_type(np.int64(1)) == np.float32  # special case for cntk
    assert get_data_type(1) == np.float32
    assert get_data_type(1.0) == np.float32
예제 #28
0
def test_proposal_layer():
    cls_prob_shape_cntk = (18,61,61)
    cls_prob_shape_caffe = (18,61,61)
    rpn_bbox_shape = (36, 61, 61)
    im_info = [1000, 1000, 1]

    # Create input tensors with values
    cls_prob =  np.random.random_sample(cls_prob_shape_cntk).astype(np.float32)
    rpn_bbox_pred = np.random.random_sample(rpn_bbox_shape).astype(np.float32)

    # Create CNTK layer and call forward
    cls_prob_var = input_variable(cls_prob_shape_cntk)
    rpn_bbox_var = input_variable(rpn_bbox_shape)

    cntk_layer = user_function(CntkProposalLayer(cls_prob_var, rpn_bbox_var, cntk.constant(im_info, (3,))))
    state, cntk_output = cntk_layer.forward({cls_prob_var: [cls_prob], rpn_bbox_var: [rpn_bbox_pred]})
    cntk_proposals = cntk_output[next(iter(cntk_output))][0]

    # Create Caffe layer and call forward
    cls_prob_caffe = cls_prob.reshape(cls_prob_shape_caffe)
    bottom = [np.array([cls_prob_caffe]),np.array([rpn_bbox_pred]),np.array([im_info])]
    top = None # handled through return statement in caffe layer for unit testing

    param_str = "'feat_stride': 16"
    caffe_layer = CaffeProposalLayer()
    caffe_layer.set_param_str(param_str)
    caffe_layer.setup(bottom, top)
    caffe_output = caffe_layer.forward(bottom, top)
    caffe_proposals = caffe_output[:,1:]

    # assert that results are exactly the same
    assert cntk_proposals.shape == caffe_proposals.shape
    assert np.allclose(cntk_proposals, caffe_proposals, rtol=0.0, atol=0.0)
    print("Verified ProposalLayer")
예제 #29
0
def test_constant_data_type_mismatch():
    a = C.constant(np.triu(np.ones(5)), shape=(5,5))
    i = C.input_variable(shape=(5,5))
    b = a * i

    with pytest.raises(ValueError):
        b.eval({i:[[np.asarray(np.random.rand(5,5),dtype=np.float32)]]})
예제 #30
0
def cumsum(x, axis=-1):
    if axis != -1 and axis != K.ndim(x) - 1:
        raise ValueError('Only the last axis could be used, found: {}'.format(axis))
    dim = x.shape[-1]
    U = C.constant(np.triu(np.ones((dim, dim))).astype(x.dtype))
    out = C.times(x, U)
    return out
예제 #31
0
    def scale_dot_product_attention_block(self, contextQ, contextV, contextK,
                                          name):

        Q = C.placeholder(shape=(2 * self.hidden_dim, ),
                          dynamic_axes=[self.b_axis, self.q_axis])
        V = C.placeholder(shape=(2 * self.hidden_dim, ),
                          dynamic_axes=[self.b_axis, self.q_axis])
        K = C.placeholder(shape=(2 * self.hidden_dim, ),
                          dynamic_axes=[self.b_axis, self.q_axis])

        Ql = C.layers.Dense(100)(Q)
        Vl = C.layers.Dense(100)(V)
        Kl = C.layers.Dense(100)(K)

        kvw, kvw_mask = C.sequence.unpack(Kl, padding_value=0).outputs
        vvw, _ = C.sequence.unpack(Vl, padding_value=0).outputs
        KT = C.swapaxes(kvw)

        S = C.reshape(C.times(Ql, KT) / math.sqrt(100), -1)
        kvw_mask_expanded = C.sequence.broadcast_as(kvw_mask, Ql)
        S = C.softmax(
            C.element_select(kvw_mask_expanded, S, C.constant(-1e+30)))
        att = C.times(S, vvw)

        return C.as_block(att, [(Q, contextQ), (V, contextV),
                                (K, contextK)], 'sdp_attention_block' + name,
                          'sdp_attention_block' + name)
예제 #32
0
    def attention(encoded, network):
        abk = dense(network)
        a, b, k = gaussian_windows_attention_coefficients(abk, nb_mixtures)
        # print("abk shape:", a.shape, b.shape, k.shape)
        # a, b, k: [#, n] [nb_mixture, 1]
        # context: [#, c] [char_ohe]

        encoded_unpacked = C.sequence.unpack(encoded, padding_value=0, no_mask_output=True)
        # context_unpacked: [#] [*=c, char_ohe]
        u = Cx.sequence.position(encoded)  # position gives shape=(1, )
        # u: [#, c], [1]
        u_values, u_valid = C.sequence.unpack(u, padding_value=999_999).outputs
        # u_values: [#] [*=c, 1]
        # u_valid: [#] [*=c]
        u_values_broadcast = C.swapaxes(C.sequence.broadcast_as(u_values, k))
        # u_values_broadcast: [#, n] [1, *=c]
        u_valid_broadcast = C.sequence.broadcast_as(C.reshape(u_valid, (1,), 1), k)
        # u_valid_broadcast: [#, n] [*=c, 1] ~ shape verified correct at his point

        # print("u_values_broadcast shape:", u_values_broadcast.shape)
        # print("abk shape:", a.shape, b.shape, k.shape)
        phi = window_weight(a, b, k, u_values_broadcast)
        # phi: [#, n] [*=c, 1]
        zero = C.constant(0)
        phi = C.element_select(u_valid_broadcast, phi, zero, name="phi")
        # phi: [#, n] [*=c, 1]
        attended = C.reduce_sum(phi * C.sequence.broadcast_as(encoded_unpacked, phi), axis=0)
        # [#, n] [1, char_ohe]
        # print("attended_context shape:", attended_context.shape)
        output = C.squeeze(attended, name="GaussianWindowAttention")
        # [#, n] [char_ohe]
        return output
예제 #33
0
def _graph_dict():
    # This function creates a graph that has no real meaning other than
    # providing something to traverse.
    d = {}

    d['i1'] = C.sequence.input_variable(shape=(2, 3), sequence_axis=Axis('ia'), name='i1')
    d['c1'] = C.constant(shape=(2, 3), value=6, name='c1')
    d['p1'] = C.parameter(shape=(3, 2), init=7, name='p1')

    d['op1'] = C.plus(d['i1'], d['c1'], name='op1')
    d['op2'] = C.times(d['op1'], d['p1'], name='op2')

    #d['slice'] = slice(d['c1'], Axis.default_dynamic_axis(), 0, 3)
    #label_sentence_start = sequence.first(raw_labels)

    # no name
    d['p2'] = C.parameter(shape=(2, 2))

    # duplicate names
    d['op3a'] = C.plus(d['op2'], d['p2'], name='op3')
    d['op3b'] = C.plus(d['op3a'], d['p2'], name='op3')

    d['first'] = C.sequence.first(d['op3b'], name='past')

    d['root'] = d['first']

    return d
예제 #34
0
def test_convolution_attributes():
    x = C.input((1, 5, 5))
    filter = np.reshape(np.array([2, -1, -1, 2], dtype=np.float32), (1, 2, 2))
    kernel = C.constant(value=filter)
    f = C.convolution(kernel, x, auto_padding=[False])
    d = f.root_function.attributes
    expected = {
        'autoPadding': [False, False, False],
        'sharing': [True, True, True],
        'strides': (1, 1, 1),
        'maxTempMemSizeInSamples': 0,
        'upperPad': (0, 0, 0),
        'lowerPad': (0, 0, 0),
        'transpose': False,
        'outputShape': (0, )
    }
    _check(expected, d)

    f = C.convolution(kernel, x, auto_padding=[False, True])
    d = f.root_function.attributes
    expected = {
        'autoPadding': [False, False, True],
        'sharing': [True, True, True],
        'strides': (1, 1, 1),
        'maxTempMemSizeInSamples': 0,
        'upperPad': (0, 0, 0),
        'lowerPad': (0, 0, 0),
        'transpose': False,
        'outputShape': (0, )
    }
    _check(expected, d)
예제 #35
0
def test_Gather(tmpdir):
    c = np.asarray([[[0], [1]], [[4], [5]]]).astype('f')
    x = C.input_variable((2, 1))
    d = np.arange(12).reshape(6, 2).astype('f')
    y = C.constant(d)
    model = C.gather(y, x)
    verify_one_input(model, c, tmpdir, 'Gather_1')
예제 #36
0
    def embed(self):
        npglove = np.zeros((self.wg_dim, 1024 + 300), dtype=np.float32)
        hf = h5py.File(
            os.path.join(self.abs_path, '../data/elmo_embedding.bin'), 'r')

        with open(os.path.join(self.abs_path, '../data/glove.840B.300d.txt'),
                  encoding='utf-8') as f:
            for line in f:
                parts = line.split()
                word = parts[0].lower()
                if word in self.vocab:
                    try:
                        if len(parts) == 301:
                            npglove[self.vocab[word], :300] = np.asarray(
                                [float(p) for p in parts[-300:]])
                            npglove[self.vocab[word],
                                    300:] = np.average(hf[word][:], axis=0)
                    except:
                        npglove[self.vocab[word],
                                300:] = np.average(hf['<UNK>'][:], axis=0)

        glove = C.constant(npglove)
        nonglove = C.parameter(shape=(self.wn_dim, 1024 + 300),
                               init=C.glorot_uniform(),
                               name='TrainableE')

        def func(wg, wn):
            return C.times(wg, glove) + C.times(wn, nonglove)

        return func
예제 #37
0
def test_ConvTranspose(tmpdir, dtype, device_id):
    if device_id == -1 and dtype == np.float16:
        pytest.skip('Test is skipped on CPU with float16 data')
    device = cntk_device(device_id)
    with C.default_options(dtype=dtype):
        # Keep the shapes below as they are, because this tests an earlier bug.
        input_shape = (48, 16, 16)
        img = np.reshape(np.arange(np.prod(input_shape), dtype=dtype),
                         input_shape)

        x = C.input_variable(input_shape)

        kernel_shape = (
            48, 32, 3, 3
        )  # For convolution_transpose the shape is (I x O x W x H)
        kernel = C.constant(value=np.ones(shape=(kernel_shape), dtype=dtype))

        conv_trans_model = C.convolution_transpose(
            kernel,
            x,
            strides=(2, 2),
            output_shape=(32, 32, 32),
            auto_padding=[False, True, True])

        verify_one_input(conv_trans_model, img, tmpdir, 'ConvTranspose_0',
                         device)
예제 #38
0
def test_constant_data_type_mismatch():
    a = C.constant(np.triu(np.ones(5)), shape=(5, 5))
    i = C.input_variable(shape=(5, 5))
    b = a * i

    with pytest.raises(ValueError):
        b.eval({i: [[np.asarray(np.random.rand(5, 5), dtype=np.float32)]]})
예제 #39
0
파일: attention.py 프로젝트: haixpham/cntkx
    def attention(query, key, value):
        dk = C.reduce_sum(C.ones_like(query))  # cannot use sequence.last, will conflict with recurrence
        # dk: [#, *] [1, ] and value = int(dim_of_query)

        unpacked_key = C.sequence.unpack(key, padding_value=0, no_mask_output=True)  # [#] [-3, key_dim]
        unpacked_value = C.sequence.unpack(value, padding_value=0, no_mask_output=True)  # [#] [-3, value_dim]

        broadcasted_key = C.sequence.broadcast_as(unpacked_key, query)  # [#, *] [-3, key_dim]
        scaled = C.times_transpose(query, broadcasted_key) / dk
        # [#, *] [q_dim] @ [#, *] [key_dim, -3], assert q_dim == key_dim
        # scaled: [#, *] [-3, ] => for every key seq element, there is a corresponding score

        # masked out invalid temporal connections to obey_sequence_order
        if obey_sequence_order and max_seq_len:
            unpacked_scaled, scaled_mask = C.sequence.unpack(scaled, padding_value=0).outputs
            # unpacked_scaled: [#] [-3, -3]  <== matrix will be top right diagonally zero-ed
            # scaled_mask: [#] [-3,]

            minus_inf = C.constant(-1e+30)
            valid_connections = C.Constant(np.tril(np.ones((max_seq_len, max_seq_len)), k=0))  # [] [max_seq, max_seq]
            valid_connections = C.reconcile_dynamic_axes(valid_connections, unpacked_scaled)  # [#] [max_seq, max_seq]
            valid_connections = C.crop_manual(valid_connections, unpacked_scaled, 0, 0)  # [#] [-3, -3]
            unpacked_scaled = C.element_select(valid_connections, unpacked_scaled, minus_inf)  # [#] [-3, -3]
            scaled = C.to_sequence_like(unpacked_scaled, query)  # [#, *] [-3]

        elif obey_sequence_order and not max_seq_len:
            raise ValueError("max_seq_len must be defined when obey_sequence_order is True")

        attended = C.times(C.softmax(scaled, axis=-1), C.sequence.broadcast_as(unpacked_value, query))  # [#, *] [value_dim,]
        return attended
def _graph_dict():
    # This function creates a graph that has no real meaning other than
    # providing something to traverse.
    d = {}

    d['i1'] = C.sequence.input_variable(shape=(2, 3),
                                        sequence_axis=Axis('ia'),
                                        name='i1')
    d['c1'] = C.constant(shape=(2, 3), value=6, name='c1')
    d['p1'] = C.parameter(shape=(3, 2), init=7, name='p1')

    d['op1'] = C.plus(d['i1'], d['c1'], name='op1')
    d['op2'] = C.times(d['op1'], d['p1'], name='op2')

    #d['slice'] = slice(d['c1'], Axis.default_dynamic_axis(), 0, 3)
    #label_sentence_start = sequence.first(raw_labels)

    # no name
    d['p2'] = C.parameter(shape=(2, 2))

    # duplicate names
    d['op3a'] = C.plus(d['op2'], d['p2'], name='op3')
    d['op3b'] = C.plus(d['op3a'], d['p2'], name='op3')

    d['first'] = C.sequence.first(d['op3b'], name='past')

    d['root'] = d['first']

    return d
예제 #41
0
def _to_dense(val, is_sequence=False):
    if is_sequence:
        x = C.sequence.input_variable(val.shape[2:], is_sparse=True)
    else:
        x = C.input_variable(val.shape[1:], is_sparse=True)

    dense = C.times(x, C.constant(value=np.eye(val.shape[-1], dtype=np.float32)))
    return dense.eval({x : val}, device=val.device)
예제 #42
0
def build_test_function():
    dev = C.cpu()
    w_value = np.asarray([[0.5, 2], [-0.5, 1.5]]).astype(np.float32)
    c1_value = 2.718
    c2_value = -3.141

    if not C.cntk_py.is_native_user_function_registered('NativeUserTimesOp'):
        C.ops.register_native_user_function('NativeUserTimesOp', 'Cntk.ExtensibilityExamples-' + C.__version__.rstrip('+'), 'CreateUserTimesFunction')

    x = C.input_variable((2))

    w = C.parameter((2, 2), init=w_value, device=dev)

    op = C.user_function(MyPlus(x, C.constant(c1_value)))
    op = C.ops.native_user_function('NativeUserTimesOp', [w, op], user_function_instance_name='my_times')

    return dev, w_value, c1_value, c2_value, C.user_function(MyPlus(op, C.constant(c2_value)))
예제 #43
0
def test_nce_backward_indices(classes, xdim, batch, expected_value, device_id, precision):
    """
    Simple test that makes sure that the derivatives have the correct sparsity pattern
    """

    # ignore precision, only sparsity pattern matters for this test
    dt = np.float32

    from cntk.losses import nce_loss
    import scipy
    trials = 10

    # Establish baseline
    expected_count = np.zeros(classes)
    I = C.constant(np.eye(classes, dtype=dt))
    q = np.arange(classes, dtype=dt) + 1
    z = C.reduce_sum(C.times(C.random_sample(q, 32, True, seed=98052), I), axis=0)
    for i in range(trials):
        expected_count[np.nonzero(z.eval().ravel())] += 1

    # Set things up to measure the same thing with nce_loss

    x = C.input_variable(xdim, needs_gradient=True)
    y = C.input_variable(classes, is_sparse=True)

    x0 = np.arange(batch * xdim, dtype=dt).reshape((batch, xdim))/(batch * xdim)
    data = np.ones(batch, dtype=dt)
    indices = list(range(10,10*batch+1,10))
    indptr = list(range(batch+1))
    y0 = scipy.sparse.csr_matrix((data, indices, indptr), shape=(batch, classes))

    b = C.parameter((classes, 1))
    W = C.parameter((classes, C.InferredDimension))

    gb = np.zeros(classes)
    vb = C.input_variable((classes, 1), dtype=dt)
    Ib = C.constant(np.eye(1, dtype=dt))
    zb = C.times(vb, Ib)

    loss = C.nce_loss(W, b, x, y, q, seed=98052)
    for i in range(trials):
        v = loss.grad({x: x0, y: y0}, wrt=loss.parameters, as_numpy=False)
        gb[np.nonzero(zb.eval({vb: v[b]}).ravel())] += 1
    for i in range(classes):
        assert gb[i] == expected_count[i] or (i in indices and gb[i] == trials)
예제 #44
0
def test_ext_eval_3_no_input():
    dim = 4
    p = C.parameter(shape=(dim,), init=10, name='p')
    m = C.user_function(MyPlus(p, C.constant(3)))
    z = m + 0

    result = z.eval()
    # No batch dimension since we have no input
    assert np.allclose(result, np.zeros_like(p) + 10 + 3)
예제 #45
0
def test_sequence_unpack_with_convolution(device_id, precision): 
    x = C.sequence.input((20, 20))
    y = C.sequence.unpack(x, 0, no_mask_output=True)
    z = C.reshape(y, (3, 20, 20))
    kernel = C.constant(1.0, (4, 3, 3, 3))
    t = C.convolution(kernel, z, auto_padding=[False, True, True])
    val = np.random.random((2, 3, 20, 20)).astype(np.float32)
    result = t.eval({x: val})
    assert np.array_equal(result.shape, (2, 4, 20, 20))
예제 #46
0
    def returnFunction():
        left_val = [[10,2]]
        right_val = [[2],[3]]

        p = placeholder(shape=(1,2))
        op = times(p, right_val)
        c = constant(left_val)

        return op.replace_placeholders({p:c})
예제 #47
0
def test_ext_eval_4_b_inside_graph():
    dim = 4
    p_init = 10
    p = C.parameter(shape=(dim,), init=p_init, name='p')
    z = C.user_function(p * MyPlus(p, C.constant(3)))

    result = z.eval()
    # No batch dimension since we have no input
    assert np.allclose(result, ((p_init * np.ones_like(result)) + 3) * p_init)
예제 #48
0
def test_gather_op(device_id, precision):
    a_data = [AA([[0],[1]], dtype=PRECISION_TO_TYPE[precision]),
              AA([[3],[4]], dtype=PRECISION_TO_TYPE[precision])]
    a = C.input_variable((2,1))
    r_data = np.arange(12).reshape(6,2).astype('f')
    r = C.parameter(shape=r_data.data, init=r_data)
    res = C.gather(r, a).eval({a:a_data})
    expectd = np.asarray([[[[0., 1.]],[[2., 3.]]],[[[6., 7.]],[[8.,9.]]]])
    assert np.array_equal(res, expectd)

    grads = C.gather(r, a).grad({a:a_data}, [r])
    expectd_grad = np.asarray([[1,1],[1,1],[0,0],[1,1],[1,1],[0,0]], dtype=np.float32)
    assert np.array_equal(grads, expectd_grad)

    #gather with indices from learning parameter (no gradients should passed through the indices -- 0s should be passed)
    indices_params = C.parameter(shape=(1,), init=1.0)
    grads = C.gather(r, (indices_params *a)).grad({a:a_data}, [r, indices_params])
    assert np.array_equal(grads[r], expectd_grad)
    assert np.array_equal(grads[indices_params], np.asarray([0.0], dtype=np.float32))


    b_data = [AA([[0,2],[1,3]], dtype=PRECISION_TO_TYPE[precision]),
              AA([[2,4],[3,5]], dtype=PRECISION_TO_TYPE[precision])]
    b = C.input_variable((2,2))
    res2 = C.gather(r, b).eval({b:b_data})

    expectd2 = np.asarray([[[[0., 1.],[4.,5.]],[[2., 3.],[6., 7.]]],[[[4., 5.],[8.,9.]],[[6., 7.], [10., 11.]]]])
    assert np.array_equal(res2, expectd2)

    #the following small model is to test the memory reuse issue of gather node.
    x = C.input((3, 4))
    x1 = C.to_sequence(x)
    w = C.parameter((5, 6), init=1)
    z = C.gather(w, x1)
    assert z.shape == (4, 6)
    #need the unpack node to trigger memory reuse.
    f = C.sequence.unpack(z, 0, no_mask_output=True)
    y = C.input((3, 4, 6))
    loss = C.reduce_mean(C.square(f - y), axis=-1)
    loss = C.reduce_mean(loss, axis=C.Axis.all_axes())

    g = C.constant(0, shape=w.shape)
    u = C.assign(w, g + 1)
    learner = C.cntk_py.universal_learner([w], [g], u)
    trainer = C.trainer.Trainer(loss, [loss], [learner])
    indices = np.asarray([[[1, 2, 1, 2]]])
    input = np.repeat(np.repeat(indices, 3, axis=1), 10, axis=0)
    lable = np.full((10, 3, 4, 6), 2)
    trainer.train_minibatch({x: input, y: lable})
    # the 2nd and 3rd rows should be udpated by gradients.
    assert np.mean(w.value[1, :]) < 1
    assert np.mean(w.value[2, :]) < 1
    # the other three rows should keep as 1
    assert np.isclose(np.mean(w.value[0, :]), 1)
    assert np.isclose(np.mean(w.value[3, :]), 1)
    assert np.isclose(np.mean(w.value[4, :]), 1)
예제 #49
0
def test_ext_eval_1():
    dim = 4
    p = C.parameter(shape=(dim,), init=10, name='p')
    i = C.sequence.input_variable(dim, needs_gradient=True, name='i_var')
    m = C.user_function(MyPlus(i, C.constant(3)))
    z = m + p

    input_data = np.random.rand(dim)
    result = z.eval([input_data])
    assert np.allclose(result[0][0], input_data + 3 + 10)
예제 #50
0
def test_Concat(tmpdir):
    data1 = np.asarray([[[1, 2], [4, 5]]], dtype=np.float32)
    x = C.constant(value=data1)
    # create 3x2 matrix in a sequence of length 1 in a batch of one sample
    data2 = np.asarray([[[10, 20], 
                         [30, 40], 
                         [50, 60]]],dtype=np.float32)
    y = C.constant(value=data2)

    # splice both inputs on axis=0 returns a 5x2 matrix
    model = C.splice(x, y, axis=1)

    verify_no_input(model, tmpdir, 'Concat_0')

    x = C.input_variable(data1.shape)

    model = C.splice(x, y, axis=1)

    verify_one_input(model, data1, tmpdir, 'Concat__1')
예제 #51
0
def test_ext_eval_5_times():
    dim = 2
    p_init = 10
    p = C.parameter(shape=(dim,), init=p_init, name='p')
    m = C.user_function(MyPlus(p, C.constant(3)))
    z = C.times(m, C.parameter(shape=(2, 50), init=2))

    result = z.eval()
    # No batch dimension since we have no input
    assert np.allclose(result, ((p_init * np.ones_like(result)) + 3) * 2 * 2)
예제 #52
0
def test_Gather(tmpdir, dtype):
    if (dtype == np.float16):
        pytest.skip("TO BE FIXED")
    with C.default_options(dtype = dtype):
        c = np.asarray([[[0],[1]],[[4],[5]]]).astype(dtype)
        x = C.input_variable((2,1))
        d = np.arange(12).reshape(6,2).astype(dtype)
        y = C.constant(d)
        model = C.gather(y, x)
        verify_one_input(model, c, tmpdir, 'Gather_1')
예제 #53
0
def create_binary_convolution_model():

    # Input variables denoting the features and label data
    feature_var = C.input((num_channels, image_height, image_width))
    label_var = C.input((num_classes))

    # apply model to input
    scaled_input = C.element_times(C.constant(0.00390625), feature_var)

    # first layer is ok to be full precision
    z = C.layers.Convolution((3, 3), 32, pad=True, activation=C.relu)(scaled_input)
    z = C.layers.MaxPooling((3,3), strides=(2,2))(z)

    z = C.layers.BatchNormalization(map_rank=1)(z)
    z = BinaryConvolution(z, (3,3), 128, channels=32, pad=True)
    z = C.layers.MaxPooling((3,3), strides=(2,2))(z)

    z = C.layers.BatchNormalization(map_rank=1)(z)
    z = BinaryConvolution(z, (3,3), 128, channels=128, pad=True)
    z = C.layers.MaxPooling((3,3), strides=(2,2))(z)

    z = C.layers.BatchNormalization(map_rank=1)(z)
    z = BinaryConvolution(z, (1,1), num_classes, channels=128, pad=True)
    z = C.layers.AveragePooling((z.shape[1], z.shape[2]))(z)
    z = C.reshape(z, (num_classes,))

    # Add binary regularization (ala Gang Hua)
    weight_sum = C.constant(0)
    for p in z.parameters:
        if (p.name == "filter"):
            weight_sum = C.plus(weight_sum, C.reduce_sum(C.minus(1, C.square(p))))
    bin_reg = C.element_times(.000005, weight_sum)

    # After the last layer, we need to apply a learnable scale
    SP = C.parameter(shape=z.shape, init=0.001)
    z = C.element_times(z, SP)

    # loss and metric
    ce = C.cross_entropy_with_softmax(z, label_var)
    ce = C.plus(ce, bin_reg)
    pe = C.classification_error(z, label_var)

    return C.combine([z, ce, pe])
예제 #54
0
 def lrn(x, depth_radius, bias, alpha, beta, name=''):
     x2 = C.square(x)
     # reshape to insert a fake singleton reduction dimension after the 3th axis (channel axis). Note Python axis order and BrainScript are reversed.
     x2s = C.reshape(x2, (1, C.InferredDimension), 0, 1)
     W = C.constant(alpha/(2*depth_radius+1), shape=(1,2*depth_radius+1,1,1), dtype=dtype, name='W')
     # 3D convolution with a filter that has a non 1-size only in the 3rd axis, and does not reduce since the reduction dimension is fake and 1
     y = C.convolution (W, x2s)
     # reshape back to remove the fake singleton reduction dimension
     b = C.reshape(y, C.InferredDimension, 0, 2)
     den = C.exp(beta * C.log(bias + b))
     return C.element_divide(x, den)
예제 #55
0
def train_eval_mnist_onelayer_from_file(criterion_name=None, eval_name=None):

    # Network definition
    feat_dim = 784
    label_dim = 10
    hidden_dim = 200
    
    cur_dir = os.path.dirname(__file__)

    training_filename = os.path.join(cur_dir, "Data", "Train-28x28_text.txt")
    test_filename = os.path.join(cur_dir, "Data", "Test-28x28_text.txt")

    features = C.input(feat_dim)
    features.name = 'features'

    feat_scale = C.constant(0.00390625)
    feats_scaled = C.element_times(features, feat_scale)

    labels = C.input(label_dim)
    labels.tag = 'label'
    labels.name = 'labels'

    traning_reader = C.CNTKTextFormatReader(training_filename)
    test_reader = C.CNTKTextFormatReader(test_filename)

    h1 = add_dnn_sigmoid_layer(feat_dim, hidden_dim, feats_scaled, 1)
    out = add_dnn_layer(hidden_dim, label_dim, h1, 1)
    out.tag = 'output'

    ec = C.cross_entropy_with_softmax(labels, out)
    ec.name = criterion_name
    ec.tag = 'criterion'
    
    eval = C.ops.square_error(labels, out)
    eval.name = eval_name
    eval.tag = 'eval'
    
    # Specify the training parameters (settings are scaled down)
    my_sgd = C.SGDParams(epoch_size=600, minibatch_size=32,
                       learning_rates_per_mb=0.1, max_epochs=5, momentum_per_mb=0)

    # Create a context or re-use if already there
    with C.LocalExecutionContext('mnist_one_layer', clean_up=True) as ctx:
        # CNTK actions
         ctx.train(
            root_nodes=[ec, eval],
            training_params=my_sgd,
            input_map=traning_reader.map(labels, alias='labels', dim=label_dim).map(features, alias='features', dim=feat_dim))
            
         result = ctx.test(
            root_nodes=[ec, eval],
            input_map=test_reader.map(labels, alias='labels', dim=label_dim).map(features, alias='features', dim=feat_dim))

         return result
예제 #56
0
파일: __init__.py 프로젝트: ssokhey/CNTK
def hierarchical_softmax_layer(input_var, label_index, label_dim, label_classes=None):
    '''
    A two layers hierarchical softmax function:

    Args:
        input_var: Variable with shape: [#,*](dim_x)
        label_index: index of label's category:  [#,*](1)
        label_dim: number of the label categories
        label_classes: number of classes of the label categories
    Returns:
        output_prob: the probability of the given label [#,*](1)
        class_probs: the probability of all the label classes [#,*](label_classes)
        all_probs: the probability of all label classes 
    '''
    input_dim = input_var.shape[0]

    if not label_classes:
        label_classes = int(np.ceil(np.sqrt(float(label_dim))))

    n_outputs_per_class = int(np.ceil(label_dim / label_classes))

    target_class = C.floor((label_index + 0.5) / n_outputs_per_class)
    target_output_in_class = C.round(label_index - target_class * n_outputs_per_class)

    w1 = parameter(shape=(input_dim, label_classes), init=C.glorot_normal(), name='hsoftmax_w1')
    b1 = parameter(shape=(label_classes), init=C.glorot_normal(), name='hsoftmax_b1')
    w2s = parameter(shape=(label_classes, input_dim, n_outputs_per_class,), init=C.glorot_normal(), name='hsoftmax_w2s')
    b2s = parameter(shape=(label_classes, n_outputs_per_class,), init=C.glorot_normal(), name='hsoftmax_b2s')

    class_probs = softmax(b1 + times(input_var, w1))

    # TODO: fix the bug in backprop for sparse, and use sparse embedding to accelerate
    target_class_one_hot = C.one_hot(target_class, num_classes=label_classes, sparse_output=False)
    w2 = C.reshape(C.times(target_class_one_hot, w2s, output_rank=2), [input_dim, -1])
    b2 = C.reshape(times(target_class_one_hot, b2s, output_rank=1), [-1])
    probs_in_class = softmax(b2 + times(input_var, w2))

    prob_in_class = C.times_transpose(C.one_hot(target_output_in_class, num_classes=n_outputs_per_class, sparse_output=False), probs_in_class)
    class_prob = C.times_transpose(C.one_hot(target_class, num_classes=label_classes, sparse_output=False), class_probs)
    output_prob = prob_in_class * class_prob

    # this is for calculating all the outputs' probabilities
    all_probs = []
    for i in range(label_classes):
        ci = C.constant(i)
        ci_one_hot = C.one_hot(ci, num_classes=label_classes, sparse_output=False)
        w2a = C.times(ci_one_hot, w2s, output_rank=2)
        b2a = C.times(ci_one_hot, b2s, output_rank=1)
        probs_in_classa = C.softmax(b2a + times(input_var, w2a))
        class_proba = C.times_transpose(ci_one_hot, class_probs)
        output_proba = probs_in_classa * class_proba
        all_probs.append(output_proba)

    return output_prob, class_probs, all_probs
예제 #57
0
def test_udf_clone():
    dim = 4
    i = C.sequence.input_variable(dim, needs_gradient=True, name='i_var')
    m_udf = C.user_function(MyPlus(i, C.constant(3)))
    p = C.parameter(shape=(dim,), init=10, name='p')
    z = m_udf + p

    z_clone = z.clone('share')

    input_data = np.random.rand(dim)
    result = z_clone.eval([input_data])
    assert np.allclose(result[0][0], input_data + 3 + 10)