Ejemplo n.º 1
0
def test_op_slice_sequence(input_data, slice_params, expected_result, device_id, precision):
    # Forward pass test
    #==================
    # We compute the expected output for the forward pass.
    # We need two surrounding brackets:
    # The first for sequences (length=1, since we have dynamic_axis='').
    # The second for batch of one sample.

    # 1 sample with 2 sequence element of a vector of 3

    t = C.dynamic_axis(name='t')
    a = I([input_data], dynamic_axis=t)

    # slice using the operator
    result = C.slice(a, slice_params[0], slice_params[1], axis='t')
    result = C.identity(result) # required hack because Slice doesn't propagate tag

    unittest_helper(result, None, [expected_result], device_id=device_id, 
                precision=precision, clean_up=False, backward_pass=False)

    # Backward pass test
    # ==================
    # The gradient of the slice operator is a tensor of the same shape as the
    # input tensor, having 1 for elements that were taken and 0 for elements
    # that were dropped.

    def grad_slice(x, beg_index, end_index):
        res = np.zeros_like(x)
        res[beg_index:end_index] = 1
        return res

    expected_gradient = grad_slice(np.asarray(input_data), *slice_params)
    
    unittest_helper(result, None, [expected_gradient], device_id = device_id,
                    precision=precision, clean_up=True, backward_pass=True, input_node=a)
Ejemplo n.º 2
0
def seqcla():

    # LSTM params
    input_dim = 50
    output_dim = 128
    cell_dim = 128
    
    # model
    num_labels = 5
    vocab = 2000
    embed_dim = 50    

    t = C.dynamic_axis(name='t')
    features = C.sparse_input(vocab, dynamic_axis=t, name='features')    
    labels = C.input(num_labels, name='labels')
   
    train_reader = C.CNTKTextFormatReader(train_file)

    # setup embedding matrix
    embedding = C.parameter((embed_dim, vocab), learning_rate_multiplier=0.0, 
                             init_from_file_path=embedding_file)

    # get the vector representing the word
    sequence = C.times(embedding, features, name='sequence')
    
    # add an LSTM layer
    L = lstm_layer(output_dim, cell_dim, sequence, input_dim)
    
    # add a softmax layer on top
    w = C.parameter((num_labels, output_dim), name='w')
    b = C.parameter((num_labels), name='b')
    z = C.times(w, L) + b
    z.name='z'
    z.tag = "output"
    
    # and reconcile the shared dynamic axis
    pred = C.reconcile_dynamic_axis(z, labels, name='pred')    
    
    ce = C.cross_entropy_with_softmax(labels, pred)
    ce.tag = "criterion"
    
    my_sgd = C.SGDParams(epoch_size=0, minibatch_size=10, learning_rates_per_mb=0.1, max_epochs=3)    
    
    with C.LocalExecutionContext('seqcla') as ctx:
        # train the model
        ctx.train(root_nodes=[ce], training_params=my_sgd, input_map=train_reader.map(
                  features, alias='x', dim=vocab, format='Sparse').map(
                  labels, alias='y', dim=num_labels, format='Dense'))        
        
        # write out the predictions
        ctx.write(input_map=train_reader.map(
                  features, alias='x', dim=vocab, format='Sparse').map(
                  labels, alias='y', dim=num_labels, format='Dense'))
                  
        # do some manual accuracy testing
        acc = calc_accuracy(train_file, ctx.output_filename_base)
        
        # and test for the same number...
        TOLERANCE_ABSOLUTE = 1E-02
        assert np.allclose(acc, 0.6006415396952687, atol=TOLERANCE_ABSOLUTE)
Ejemplo n.º 3
0
def seqcla():

    # LSTM params
    input_dim = 50
    output_dim = 128
    cell_dim = 128
    
    # model
    num_labels = 5
    vocab = 2000
    embed_dim = 50    

    t = C.dynamic_axis(name='t')
    # temporarily using cntk1 SparseInput because cntk2's Input() will simply allow sparse as a parameter
    features = cntk1.SparseInput(vocab, dynamicAxis=t, name='features')    
    labels = C.input(num_labels, name='labels')
   
    train_reader = C.CNTKTextFormatReader(train_file)

    # setup embedding matrix
    embedding = C.parameter((embed_dim, vocab), learning_rate_multiplier=0.0, 
                             init_from_file_path=embedding_file)

    # get the vector representing the word
    sequence = C.times(embedding, features, name='sequence')
    
    # add an LSTM layer
    L = lstm_layer(output_dim, cell_dim, sequence, input_dim)
    
    # add a softmax layer on top
    w = C.parameter((num_labels, output_dim), name='w')
    b = C.parameter((num_labels), name='b')
    z = C.plus(C.times(w, L), b, name='z')
    z.tag = "output"
    
    # and reconcile the shared dynamic axis
    pred = C.reconcile_dynamic_axis(z, labels, name='pred')    
    
    ce = C.cross_entropy_with_softmax(labels, pred)
    ce.tag = "criterion"
    
    my_sgd = C.SGDParams(epoch_size=0, minibatch_size=10, learning_rates_per_mb=0.1, max_epochs=3)    
    
    with C.LocalExecutionContext('seqcla') as ctx:
        # train the model
        ctx.train(root_nodes=[ce], training_params=my_sgd, input_map=train_reader.map(
                  features, alias='x', dim=vocab, format='Sparse').map(
                  labels, alias='y', dim=num_labels, format='Dense'))        
        
        # write out the predictions
        ctx.write(input_map=train_reader.map(
                  features, alias='x', dim=vocab, format='Sparse').map(
                  labels, alias='y', dim=num_labels, format='Dense'))
                  
        # do some manual accuracy testing
        acc = calc_accuracy(train_file, ctx.output_filename_base)
        
        # and test for the same number...
        TOLERANCE_ABSOLUTE = 1E-02
        assert np.allclose(acc, 0.6006415396952687, atol=TOLERANCE_ABSOLUTE)
Ejemplo n.º 4
0
def test_op_slice_sequence(input_data, slice_params, expected_result,
                           device_id, precision):
    # Forward pass test
    #==================
    # We compute the expected output for the forward pass.
    # We need two surrounding brackets:
    # The first for sequences (length=1, since we have dynamic_axis='').
    # The second for batch of one sample.

    # 1 sample with 2 sequence element of a vector of 3

    t = C.dynamic_axis(name='t')
    a = I([input_data], dynamic_axis=t)

    # slice using the operator
    result = C.slice(a, slice_params[0], slice_params[1], axis='t')
    result = C.identity(
        result)  # required hack because Slice doesn't propagate tag

    unittest_helper(result,
                    None, [expected_result],
                    device_id=device_id,
                    precision=precision,
                    clean_up=False,
                    backward_pass=False)

    # Backward pass test
    # ==================
    # The gradient of the slice operator is a tensor of the same shape as the
    # input tensor, having 1 for elements that were taken and 0 for elements
    # that were dropped.

    def grad_slice(x, beg_index, end_index):
        res = np.zeros_like(x)
        res[beg_index:end_index] = 1
        return res

    expected_gradient = grad_slice(np.asarray(input_data), *slice_params)

    unittest_helper(result,
                    None, [expected_gradient],
                    device_id=device_id,
                    precision=precision,
                    clean_up=True,
                    backward_pass=True,
                    input_node=a)