Esempio n. 1
0
def test_random_sample_with_replacement(weights, num_samples, tolerance,
                                        raises_exception, device_id,
                                        precision):

    weights = AA(weights, precision)
    expected_relative_frequency = weights / np.sum(weights)
    num_calls = 10
    identity = np.identity(weights.size)
    allow_duplicates = True  # sample with replacement

    if raises_exception:
        with pytest.raises(ValueError):
            result = random_sample(weights, num_samples, allow_duplicates)
            result.eval()
    else:
        observed_frequency = np.empty_like(weights)
        for i in range(0, num_calls):
            result = random_sample(weights, num_samples, allow_duplicates)
            denseResult = times(result, identity)
            observed_frequency += np.sum(denseResult.eval(), 0)
        observed_relative_frequency = observed_frequency / \
            (num_calls * num_samples)
        assert np.allclose(observed_relative_frequency,
                           expected_relative_frequency,
                           atol=tolerance)
Esempio n. 2
0
def test_random_sample_with_explicit_seed(device_id, precision):
    weights = AA([x for x in range(0, 10)], precision)
    identity = np.identity(weights.size)
    allow_duplicates = False  # sample without replacement
    num_samples = 5;
    seed = 123
    to_dense = lambda x: times(x, identity).eval()
    result1 = to_dense(random_sample(weights, num_samples, allow_duplicates, seed))
    result2 = to_dense(random_sample(weights, num_samples, allow_duplicates, seed))
    result3 = to_dense(random_sample(weights, num_samples, allow_duplicates, seed+1))
    result4 = to_dense(random_sample(weights, num_samples, allow_duplicates))
    assert np.allclose(result1, result2)
    assert not np.allclose(result1, result3)
    assert not np.allclose(result1, result4)
Esempio n. 3
0
def test_random_sample_without_replacement(weights, num_samples, expected_count, tolerance, raises_exception, device_id, precision):

    weights = AA(weights, precision)
    identity = np.identity(weights.size)
    allow_duplicates = False  # sample without replacement

    if raises_exception:
        with pytest.raises(ValueError):
            result = random_sample(weights, num_samples, allow_duplicates)
            result.eval()
    else:
        result = random_sample(weights, num_samples, allow_duplicates)
        denseResult = times(result, identity)
        observed_count = np.sum(denseResult.eval(), 0)
        assert np.allclose(observed_count, expected_count, atol=tolerance)
Esempio n. 4
0
def test_random_sample_with_explicit_seed(device_id, precision):
    weights = AA([x for x in range(0, 10)], precision)
    identity = np.identity(weights.size)
    allow_duplicates = False  # sample without replacement
    num_samples = 5
    seed = 123
    to_dense = lambda x: times(x, identity).eval()
    result1 = to_dense(
        random_sample(weights, num_samples, allow_duplicates, seed))
    result2 = to_dense(
        random_sample(weights, num_samples, allow_duplicates, seed))
    result3 = to_dense(
        random_sample(weights, num_samples, allow_duplicates, seed + 1))
    result4 = to_dense(random_sample(weights, num_samples, allow_duplicates))
    assert np.allclose(result1, result2)
    assert not np.allclose(result1, result3)
    assert not np.allclose(result1, result4)
Esempio n. 5
0
def cross_entropy_with_sampled_softmax(
    hidden_vector,  # Node providing the output of the recurrent layers
    target_vector,  # Node providing the expected labels (as sparse vectors)
    vocab_dim,  # Vocabulary size
    hidden_dim,  # Dimension of the hidden vector
    num_samples,  # Number of samples to use for sampled softmax
    sampling_weights,  # Node providing weights to be used for the weighted sampling
    allow_duplicates=False  # Boolean flag to control whether to use sampling with replacement (allow_duplicates == True) or without replacement.
):
    bias = C.layers.Parameter(shape=(vocab_dim, 1), init=0)
    weights = C.layers.Parameter(shape=(vocab_dim, hidden_dim),
                                 init=C.initializer.glorot_uniform())

    sample_selector_sparse = C.random_sample(
        sampling_weights, num_samples,
        allow_duplicates)  # sparse matrix [num_samples * vocab_size]
    if use_sparse:
        sample_selector = sample_selector_sparse
    else:
        # Note: Sampled softmax with dense data is only supported for debugging purposes.
        # It might easily run into memory issues as the matrix 'I' below might be quite large.
        # In case we wan't to a dense representation for all data we have to convert the sample selector
        I = C.Constant(np.eye(vocab_dim, dtype=np.float32))
        sample_selector = C.times(sample_selector_sparse, I)

    inclusion_probs = C.random_sample_inclusion_frequency(
        sampling_weights, num_samples,
        allow_duplicates)  # dense row [1 * vocab_size]
    log_prior = C.log(inclusion_probs)  # dense row [1 * vocab_dim]

    print("hidden_vector: " + str(hidden_vector.shape))
    wS = C.times(sample_selector, weights,
                 name='wS')  # [num_samples * hidden_dim]
    print("ws:" + str(wS.shape))
    zS = C.times_transpose(wS, hidden_vector, name='zS1') + C.times(
        sample_selector, bias, name='zS2') - C.times_transpose(
            sample_selector, log_prior, name='zS3')  # [num_samples]

    # Getting the weight vector for the true label. Dimension hidden_dim
    wT = C.times(target_vector, weights, name='wT')  # [1 * hidden_dim]
    zT = C.times_transpose(wT, hidden_vector, name='zT1') + C.times(
        target_vector, bias, name='zT2') - C.times_transpose(
            target_vector, log_prior, name='zT3')  # [1]

    zSReduced = C.reduce_log_sum_exp(zS)

    # Compute the cross entropy that is used for training.
    # We don't check whether any of the classes in the random samples coincides with the true label, so it might happen that the true class is counted
    # twice in the normalizing denominator of sampled softmax.
    cross_entropy_on_samples = C.log_add_exp(zT, zSReduced) - zT

    # For applying the model we also output a node providing the input for the full softmax
    z = C.times_transpose(weights, hidden_vector) + bias
    z = C.reshape(z, shape=(vocab_dim))

    zSMax = C.reduce_max(zS)
    error_on_samples = C.less(zT, zSMax)
    return (z, cross_entropy_on_samples, error_on_samples)
Esempio n. 6
0
def test_random_sample_without_replacement(weights, num_samples,
                                           expected_count, tolerance,
                                           raises_exception, device_id,
                                           precision):

    weights = AA(weights, precision)
    identity = np.identity(weights.size)
    allow_duplicates = False  # sample without replacement

    if raises_exception:
        with pytest.raises(ValueError):
            result = random_sample(weights, num_samples, allow_duplicates)
            result.eval()
    else:
        result = random_sample(weights, num_samples, allow_duplicates)
        denseResult = times(result, identity)
        observed_count = np.sum(denseResult.eval(), 0)
        assert np.allclose(observed_count, expected_count, atol=tolerance)
Esempio n. 7
0
def test_random_sample_with_replacement(weights, num_samples, tolerance, raises_exception, device_id, precision):

    weights = AA(weights, precision)
    expected_relative_frequency = weights / np.sum(weights)
    num_calls = 10
    identity = np.identity(weights.size)
    allow_duplicates = True  # sample with replacement

    if raises_exception:
        with pytest.raises(ValueError):
            result = random_sample(weights, num_samples, allow_duplicates)
            result.eval()
    else:
        observed_frequency = np.empty_like(weights)
        for i in range(0, num_calls):
            result = random_sample(weights, num_samples, allow_duplicates)
            denseResult = times(result, identity)
            observed_frequency += np.sum(denseResult.eval(), 0)
        observed_relative_frequency = observed_frequency / \
            (num_calls * num_samples)
        assert np.allclose(observed_relative_frequency,
                           expected_relative_frequency, atol=tolerance)
def test_set_rng_seed_attribute():
    from cntk import random_sample, input;

    random_sample_node = random_sample(input(1), 1, True, seed=123)
    key = 'rngSeed'

    root = random_sample_node.root_function
    assert root.attributes[key] == 123
    
    root.set_attribute(key, 11530328594546889191)
    assert root.attributes[key] == 11530328594546889191

    random_sample_node.set_attribute(key, 2**31)
    assert root.attributes[key] == 2**31
Esempio n. 9
0
def cross_entropy_with_sampled_softmax(
    hidden_vector,           # Node providing the output of the recurrent layers
    target_vector,           # Node providing the expected labels (as sparse vectors)
    vocab_dim,               # Vocabulary size
    hidden_dim,              # Dimension of the hidden vector
    num_samples,             # Number of samples to use for sampled softmax
    sampling_weights,        # Node providing weights to be used for the weighted sampling
    allow_duplicates = False # Boolean flag to control whether to use sampling with replacement (allow_duplicates == True) or without replacement.
    ):
    bias = C.Parameter(shape = (vocab_dim, 1), init = 0)
    weights = C.Parameter(shape = (vocab_dim, hidden_dim), init = C.initializer.glorot_uniform())

    sample_selector_sparse = C.random_sample(sampling_weights, num_samples, allow_duplicates) # sparse matrix [num_samples * vocab_size]
    if use_sparse:
        sample_selector = sample_selector_sparse
    else:
        # Note: Sampled softmax with dense data is only supported for debugging purposes.
        # It might easily run into memory issues as the matrix 'I' below might be quite large.
        # In case we wan't to a dense representation for all data we have to convert the sample selector
        I = C.Constant(np.eye(vocab_dim, dtype=np.float32))
        sample_selector = C.times(sample_selector_sparse, I)

    inclusion_probs = C.random_sample_inclusion_frequency(sampling_weights, num_samples, allow_duplicates) # dense row [1 * vocab_size]
    log_prior = C.log(inclusion_probs) # dense row [1 * vocab_dim]


    print("hidden_vector: "+str(hidden_vector.shape))
    wS = C.times(sample_selector, weights, name='wS') # [num_samples * hidden_dim]
    print("ws:"+str(wS.shape))
    zS = C.times_transpose(wS, hidden_vector, name='zS1') + C.times(sample_selector, bias, name='zS2') - C.times_transpose (sample_selector, log_prior, name='zS3')# [num_samples]

    # Getting the weight vector for the true label. Dimension hidden_dim
    wT = C.times(target_vector, weights, name='wT') # [1 * hidden_dim]
    zT = C.times_transpose(wT, hidden_vector, name='zT1') + C.times(target_vector, bias, name='zT2') - C.times_transpose(target_vector, log_prior, name='zT3') # [1]


    zSReduced = C.reduce_log_sum_exp(zS)

    # Compute the cross entropy that is used for training.
    # We don't check whether any of the classes in the random samples coincides with the true label, so it might happen that the true class is counted
    # twice in the normalizing denominator of sampled softmax.
    cross_entropy_on_samples = C.log_add_exp(zT, zSReduced) - zT

    # For applying the model we also output a node providing the input for the full softmax
    z = C.times_transpose(weights, hidden_vector) + bias
    z = C.reshape(z, shape = (vocab_dim))

    zSMax = C.reduce_max(zS)
    error_on_samples = C.less(zT, zSMax)
    return (z, cross_entropy_on_samples, error_on_samples)
Esempio n. 10
0
def test_set_rng_seed_attribute():
    from cntk import random_sample, input;

    random_sample_node = random_sample(input(1), 1, True, seed=123)
    key = 'rngSeed'

    root = random_sample_node.root_function
    assert root.attributes[key] == 123
    
    root.set_attribute(key, 11530328594546889191)
    assert root.attributes[key] == 11530328594546889191

    random_sample_node.set_attribute(key, 2**31)
    assert root.attributes[key] == 2**31
Esempio n. 11
0
def test_nce_backward_indices(classes, xdim, batch, expected_value, device_id,
                              precision):
    """
    Simple test that makes sure that the derivatives have the correct sparsity pattern
    """

    # ignore precision, only sparsity pattern matters for this test
    dt = np.float32

    from cntk.losses import nce_loss
    import scipy
    trials = 10

    # Establish baseline
    expected_count = np.zeros(classes)
    I = C.constant(np.eye(classes, dtype=dt))
    q = np.arange(classes, dtype=dt) + 1
    z = C.reduce_sum(C.times(C.random_sample(q, 32, True, seed=98052), I),
                     axis=0)
    for i in range(trials):
        expected_count[np.nonzero(z.eval().ravel())] += 1

    # Set things up to measure the same thing with nce_loss

    x = C.input_variable(xdim, needs_gradient=True)
    y = C.input_variable(classes, is_sparse=True)

    x0 = np.arange(batch * xdim, dtype=dt).reshape(
        (batch, xdim)) / (batch * xdim)
    data = np.ones(batch, dtype=dt)
    indices = list(range(10, 10 * batch + 1, 10))
    indptr = list(range(batch + 1))
    y0 = scipy.sparse.csr_matrix((data, indices, indptr),
                                 shape=(batch, classes))

    b = C.parameter((classes, 1))
    W = C.parameter((classes, C.InferredDimension))

    gb = np.zeros(classes)
    vb = C.input_variable((classes, 1), dtype=dt)
    Ib = C.constant(np.eye(1, dtype=dt))
    zb = C.times(vb, Ib)

    loss = C.nce_loss(W, b, x, y, q, seed=98052)
    for i in range(trials):
        v = loss.grad({x: x0, y: y0}, wrt=loss.parameters, as_numpy=False)
        gb[np.nonzero(zb.eval({vb: v[b]}).ravel())] += 1
    for i in range(classes):
        assert gb[i] == expected_count[i] or (i in indices and gb[i] == trials)
Esempio n. 12
0
def test_nce_backward_indices(classes, xdim, batch, expected_value, device_id, precision):
    """
    Simple test that makes sure that the derivatives have the correct sparsity pattern
    """

    # ignore precision, only sparsity pattern matters for this test
    dt = np.float32

    from cntk.losses import nce_loss
    import scipy
    trials = 10

    # Establish baseline
    expected_count = np.zeros(classes)
    I = C.constant(np.eye(classes, dtype=dt))
    q = np.arange(classes, dtype=dt) + 1
    z = C.reduce_sum(C.times(C.random_sample(q, 32, True, seed=98052), I), axis=0)
    for i in range(trials):
        expected_count[np.nonzero(z.eval().ravel())] += 1

    # Set things up to measure the same thing with nce_loss

    x = C.input_variable(xdim, needs_gradient=True)
    y = C.input_variable(classes, is_sparse=True)

    x0 = np.arange(batch * xdim, dtype=dt).reshape((batch, xdim))/(batch * xdim)
    data = np.ones(batch, dtype=dt)
    indices = list(range(10,10*batch+1,10))
    indptr = list(range(batch+1))
    y0 = scipy.sparse.csr_matrix((data, indices, indptr), shape=(batch, classes))

    b = C.parameter((classes, 1))
    W = C.parameter((classes, C.InferredDimension))

    gb = np.zeros(classes)
    vb = C.input_variable((classes, 1), dtype=dt)
    Ib = C.constant(np.eye(1, dtype=dt))
    zb = C.times(vb, Ib)

    loss = C.nce_loss(W, b, x, y, q, seed=98052)
    for i in range(trials):
        v = loss.grad({x: x0, y: y0}, wrt=loss.parameters, as_numpy=False)
        gb[np.nonzero(zb.eval({vb: v[b]}).ravel())] += 1
    for i in range(classes):
        assert gb[i] == expected_count[i] or (i in indices and gb[i] == trials)
Esempio n. 13
0
def cross_entropy_with_sampled_softmax(
    hidden_vector,          
    label_vector,           
    vocab_dim,              
    hidden_dim,             
    num_samples,            
    sampling_weights,       
    allow_duplicates = False 
    ):

	bias = C.layers.Parameter(shape = (vocab_dim, 1), init = 0)
	weights = C.layers.Parameter(shape = (vocab_dim, hidden_dim), init = C.initializer.glorot_uniform())

	sample_selector_sparse = C.random_sample(sampling_weights, num_samples, allow_duplicates)
	sample_selector = sample_selector_sparse

	inclusion_probs = C.random_sample_inclusion_frequency(sampling_weights, num_samples, allow_duplicates)
	log_prior = C.log(inclusion_probs)

	wS = C.times(sample_selector, weights, name='wS')
	zS = C.times_transpose(wS, hidden_vector, name='zS1') + C.times(sample_selector, bias, name='zS2') - C.times_transpose (sample_selector, log_prior, name='zS3')

	# Getting the weight vector for the true label. Dimension hidden_dim
	wT = C.times(label_vector, weights, name='wT')
	zT = C.times_transpose(wT, hidden_vector, name='zT1') + C.times(label_vector, bias, name='zT2') - C.times_transpose(label_vector, log_prior, name='zT3')

	zSReduced = C.reduce_log_sum_exp(zS)

	# Compute the cross entropy that is used for training.
	cross_entropy_on_samples = C.log_add_exp(zT, zSReduced) - zT

	# For applying the model we also output a node providing the input for the full softmax
	z = C.times_transpose(weights, hidden_vector) + bias
	z = C.reshape(z, shape = (vocab_dim))

	zSMax = C.reduce_max(zS)
	error_on_samples = C.less(zT, zSMax)

	return (z, cross_entropy_on_samples, error_on_samples)