def test_op_slice_sequence(input_data, slice_params, expected_result, device_id, precision): # Forward pass test #================== # We compute the expected output for the forward pass. # We need two surrounding brackets: # The first for sequences (length=1, since we have dynamic_axis=''). # The second for batch of one sample. # 1 sample with 2 sequence element of a vector of 3 t = C.dynamic_axis(name='t') a = I([input_data], dynamic_axis=t) # slice using the operator result = C.slice(a, slice_params[0], slice_params[1], axis='t') result = C.identity(result) # required hack because Slice doesn't propagate tag unittest_helper(result, None, [expected_result], device_id=device_id, precision=precision, clean_up=False, backward_pass=False) # Backward pass test # ================== # The gradient of the slice operator is a tensor of the same shape as the # input tensor, having 1 for elements that were taken and 0 for elements # that were dropped. def grad_slice(x, beg_index, end_index): res = np.zeros_like(x) res[beg_index:end_index] = 1 return res expected_gradient = grad_slice(np.asarray(input_data), *slice_params) unittest_helper(result, None, [expected_gradient], device_id = device_id, precision=precision, clean_up=True, backward_pass=True, input_node=a)
def seqcla(): # LSTM params input_dim = 50 output_dim = 128 cell_dim = 128 # model num_labels = 5 vocab = 2000 embed_dim = 50 t = C.dynamic_axis(name='t') features = C.sparse_input(vocab, dynamic_axis=t, name='features') labels = C.input(num_labels, name='labels') train_reader = C.CNTKTextFormatReader(train_file) # setup embedding matrix embedding = C.parameter((embed_dim, vocab), learning_rate_multiplier=0.0, init_from_file_path=embedding_file) # get the vector representing the word sequence = C.times(embedding, features, name='sequence') # add an LSTM layer L = lstm_layer(output_dim, cell_dim, sequence, input_dim) # add a softmax layer on top w = C.parameter((num_labels, output_dim), name='w') b = C.parameter((num_labels), name='b') z = C.times(w, L) + b z.name='z' z.tag = "output" # and reconcile the shared dynamic axis pred = C.reconcile_dynamic_axis(z, labels, name='pred') ce = C.cross_entropy_with_softmax(labels, pred) ce.tag = "criterion" my_sgd = C.SGDParams(epoch_size=0, minibatch_size=10, learning_rates_per_mb=0.1, max_epochs=3) with C.LocalExecutionContext('seqcla') as ctx: # train the model ctx.train(root_nodes=[ce], training_params=my_sgd, input_map=train_reader.map( features, alias='x', dim=vocab, format='Sparse').map( labels, alias='y', dim=num_labels, format='Dense')) # write out the predictions ctx.write(input_map=train_reader.map( features, alias='x', dim=vocab, format='Sparse').map( labels, alias='y', dim=num_labels, format='Dense')) # do some manual accuracy testing acc = calc_accuracy(train_file, ctx.output_filename_base) # and test for the same number... TOLERANCE_ABSOLUTE = 1E-02 assert np.allclose(acc, 0.6006415396952687, atol=TOLERANCE_ABSOLUTE)
def seqcla(): # LSTM params input_dim = 50 output_dim = 128 cell_dim = 128 # model num_labels = 5 vocab = 2000 embed_dim = 50 t = C.dynamic_axis(name='t') # temporarily using cntk1 SparseInput because cntk2's Input() will simply allow sparse as a parameter features = cntk1.SparseInput(vocab, dynamicAxis=t, name='features') labels = C.input(num_labels, name='labels') train_reader = C.CNTKTextFormatReader(train_file) # setup embedding matrix embedding = C.parameter((embed_dim, vocab), learning_rate_multiplier=0.0, init_from_file_path=embedding_file) # get the vector representing the word sequence = C.times(embedding, features, name='sequence') # add an LSTM layer L = lstm_layer(output_dim, cell_dim, sequence, input_dim) # add a softmax layer on top w = C.parameter((num_labels, output_dim), name='w') b = C.parameter((num_labels), name='b') z = C.plus(C.times(w, L), b, name='z') z.tag = "output" # and reconcile the shared dynamic axis pred = C.reconcile_dynamic_axis(z, labels, name='pred') ce = C.cross_entropy_with_softmax(labels, pred) ce.tag = "criterion" my_sgd = C.SGDParams(epoch_size=0, minibatch_size=10, learning_rates_per_mb=0.1, max_epochs=3) with C.LocalExecutionContext('seqcla') as ctx: # train the model ctx.train(root_nodes=[ce], training_params=my_sgd, input_map=train_reader.map( features, alias='x', dim=vocab, format='Sparse').map( labels, alias='y', dim=num_labels, format='Dense')) # write out the predictions ctx.write(input_map=train_reader.map( features, alias='x', dim=vocab, format='Sparse').map( labels, alias='y', dim=num_labels, format='Dense')) # do some manual accuracy testing acc = calc_accuracy(train_file, ctx.output_filename_base) # and test for the same number... TOLERANCE_ABSOLUTE = 1E-02 assert np.allclose(acc, 0.6006415396952687, atol=TOLERANCE_ABSOLUTE)
def test_op_slice_sequence(input_data, slice_params, expected_result, device_id, precision): # Forward pass test #================== # We compute the expected output for the forward pass. # We need two surrounding brackets: # The first for sequences (length=1, since we have dynamic_axis=''). # The second for batch of one sample. # 1 sample with 2 sequence element of a vector of 3 t = C.dynamic_axis(name='t') a = I([input_data], dynamic_axis=t) # slice using the operator result = C.slice(a, slice_params[0], slice_params[1], axis='t') result = C.identity( result) # required hack because Slice doesn't propagate tag unittest_helper(result, None, [expected_result], device_id=device_id, precision=precision, clean_up=False, backward_pass=False) # Backward pass test # ================== # The gradient of the slice operator is a tensor of the same shape as the # input tensor, having 1 for elements that were taken and 0 for elements # that were dropped. def grad_slice(x, beg_index, end_index): res = np.zeros_like(x) res[beg_index:end_index] = 1 return res expected_gradient = grad_slice(np.asarray(input_data), *slice_params) unittest_helper(result, None, [expected_gradient], device_id=device_id, precision=precision, clean_up=True, backward_pass=True, input_node=a)