def create_model(X): # defines few stacked GRUs l1 = Recurrence(step_function=GRU(shape=20))(X) l2 = Recurrence(step_function=GRU(shape=20))(l1) l3 = Dense(shape=1)(l2) return l3
def model(self, x): param1 = 500 param2 = 250 x = Dense(param1, activation=cntk.tanh)(x) x = Dense(param1, activation=cntk.tanh)(x) x = Dense(param1, activation=cntk.tanh)(x) x = Sequential([(Recurrence(LSTM(param2)), Recurrence(LSTM(param2), go_backwards=True)), cntk.splice])(x) x = Sequential([(Recurrence(LSTM(param2)), Recurrence(LSTM(param2), go_backwards=True)), cntk.splice])(x) x = Dense(self.dim_y)(x) return x
def LSTM_sequence_classifier_net(input, num_output_classes, embedding_dim, LSTM_dim, cell_dim): lstm_classifier = Sequential([Embedding(embedding_dim), Recurrence(LSTM(LSTM_dim, cell_dim)), sequence.last, Dense(num_output_classes)]) return lstm_classifier(input)
def create_model(input_sequence, label_sequence, vocab_dim, hidden_dim): # Create the rnn that computes the latent representation for the next token. rnn_with_latent_output = Sequential([ C.layers.Embedding(hidden_dim), For( range(num_layers), lambda: Sequential([ Stabilizer(), Recurrence(LSTM(hidden_dim), go_backwards=False) ])), ]) # Apply it to the input sequence. latent_vector = rnn_with_latent_output(input_sequence) # Connect the latent output to (sampled/full) softmax. if use_sampled_softmax: weights = load_sampling_weights(token_frequencies_file_path) smoothed_weights = np.float32(np.power(weights, alpha)) sampling_weights = C.reshape(C.Constant(smoothed_weights), shape=(1, vocab_dim)) z, ce, errs = cross_entropy_with_sampled_softmax( latent_vector, label_sequence, vocab_dim, hidden_dim, softmax_sample_size, sampling_weights) else: z, ce, errs = cross_entropy_with_full_softmax(latent_vector, label_sequence, vocab_dim, hidden_dim) return z, ce, errs
def test_htk_deserializers(): mbsize = 640 epoch_size = 1000 * mbsize lr = [0.001] feature_dim = 33 num_classes = 132 context = 2 os.chdir(data_path) features_file = "glob_0000.scp" labels_file = "glob_0000.mlf" label_mapping_file = "state.list" fd = HTKFeatureDeserializer( StreamDefs(amazing_features=StreamDef( shape=feature_dim, context=(context, context), scp=features_file))) ld = HTKMLFDeserializer( label_mapping_file, StreamDefs( awesome_labels=StreamDef(shape=num_classes, mlf=labels_file))) reader = MinibatchSource([fd, ld]) features = C.input_variable(((2 * context + 1) * feature_dim)) labels = C.input_variable((num_classes)) model = Sequential( [For(range(3), lambda: Recurrence(LSTM(256))), Dense(num_classes)]) z = model(features) ce = C.cross_entropy_with_softmax(z, labels) errs = C.classification_error(z, labels) learner = C.adam_sgd(z.parameters, lr=C.learning_rate_schedule(lr, C.UnitType.sample, epoch_size), momentum=C.momentum_as_time_constant_schedule(1000), low_memory=True, gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True) trainer = C.Trainer(z, (ce, errs), learner) input_map = { features: reader.streams.amazing_features, labels: reader.streams.awesome_labels } pp = C.ProgressPrinter(freq=0) # just run and verify it doesn't crash for i in range(3): mb_data = reader.next_minibatch(mbsize, input_map=input_map) trainer.train_minibatch(mb_data) pp.update_with_trainer(trainer, with_metric=True) assert True os.chdir(abs_path)
def test_ctc_encoder_train_and_network_output_to_labels(): # test CTC encoder in training loop and CTCEncoder.network_output_to_labels a = C.sequence.input_variable(10) labels = ['a', 'b', 'c'] encoder = CTCEncoder(labels) labels_tensor = C.sequence.input_variable(len( encoder.classes_)) # number of classes = 4 input_tensor = C.sequence.input_variable(100) prediction_tensor = Dense(4)(Recurrence(LSTM(100))( C.ones_like(input_tensor))) labels_graph = C.labels_to_graph(labels_tensor) fb = C.forward_backward(labels_graph, prediction_tensor, blankTokenId=encoder.blankTokenId) ground_truth = ['a', 'b', 'b', 'b', 'c'] seq_length = 10 # must be the same length as the sequence length in network_out pred = np.array([ [0., 2., 0., 0.], [0., 2., 0., 0.], [0., 0., 2., 0.], [2., 0., 0., 0.], [0., 0., 2., 0.], [2., 0., 0., 0.], [0., 0., 2., 0.], [2., 0., 0., 0.], [0., 0., 0., 2.], [0., 0., 0., 2.], ]).astype(np.float32) n = np.random.random((10, 100)).astype(np.float32) # result = fb.eval({labels_tensor: [encoder.transform(ground_truth, seq_length=seq_length)], # input_tensor: [n]}) # print(result) adam = C.adam(prediction_tensor.parameters, 0.01, 0.912) trainer = C.Trainer(prediction_tensor, (fb, ), [adam]) for i in range(300): trainer.train_minibatch({ labels_tensor: [encoder.transform(ground_truth, seq_length=seq_length)], input_tensor: [n] }) # print(trainer.previous_minibatch_loss_average) result = prediction_tensor.eval({input_tensor: [n]}) assert encoder.network_output_to_labels(result[0], squash_repeat=True) == ground_truth
def create_model(output_dim): return Sequential([ For( range(num_layers), lambda: Sequential([ Stabilizer(), Recurrence(LSTM(hidden_dim), go_backwards=False) ])), Dense(output_dim) ])
def create_model(): ''' Creates the model to train :return: Returns the last output of a sequential model using LSTMs ''' return Sequential([ For(range(NUMBER_LAYERS), lambda: Sequential([Recurrence(LSTM(HIDDEN_LAYER_DIMENSIONS))])), sequence.last, Dense(NUM_OUTPUT_CLASSES) ])
def test_recurrence(): inputAxis = Axis('inputAxis') stateAxis = Axis('stateAxis') InputSequence = SequenceOver[inputAxis] StateSequence = SequenceOver[stateAxis] # input and expected for both tests below x = np.reshape(np.arange(0, 25, dtype=np.float32), (1, 5, 5)) exp = [[0.239151, 0.239151, 0.239151, 0.239151, 0.239151], [0.338713, 0.338713, 0.338713, 0.338713, 0.338713], [0.367456, 0.367456, 0.367456, 0.367456, 0.367456], [0.375577, 0.375577, 0.375577, 0.375577, 0.375577], [0.377891, 0.377891, 0.377891, 0.377891, 0.377891]] #################################################### # Test 1: Recurrence(): initial state is constant #################################################### # Note: We cannot use random init of the GRU parameters because random numbers will # depend on what previous tests were run. Hence, use a constant (which is not realistic). # TODO: Find out how to reset the random generator, then remove the constant init. R = Recurrence(GRU(5, init=0.05), go_backwards=False, initial_state=0.1) @Function @Signature(InputSequence[Tensor[5]]) def F(x): return R(x) rt = F(x) np.testing.assert_array_almost_equal( rt[0], exp, decimal=6, err_msg='Error in Recurrence(GRU()) forward') #################################################### # Test 2: RecurrenceFrom(): initial state is data input #################################################### RF = RecurrenceFrom(GRU(5, init=0.05), go_backwards=False) @Function @Signature(s=StateSequence[Tensor[5]], x=InputSequence[Tensor[5]]) def FF(s, x): return RF(s, x) s = np.ones( (1, 5, 5) ) * 0.1 # we pass the same value as the constant in the previous test to make the result the same rt = FF(s, x) np.testing.assert_array_almost_equal( rt[0], exp, decimal=6, err_msg='Error in RecurrenceFrom(GRU()) forward')
def test_large_model_serialization_float(tmpdir): import os from cntk.layers import Recurrence, LSTM, Dense type_size = np.dtype(np.float32).itemsize two_gb = 2**31 size = (2097152 + 4, 256, 512, 4096) assert size[0] * size[1] * type_size > two_gb device = C.device.cpu() i = C.sequence.input(size[0]) w = C.Parameter((size[0], size[1]), init=C.uniform(3.0, seed=12345), device=device) e = C.times(i, w) h_fwd = Recurrence(LSTM(size[2]))(e) h_bwd = Recurrence(LSTM(size[2]), go_backwards=True)(e) h_last_fwd = C.sequence.last(h_fwd) h_first_bwd = C.sequence.first(h_bwd) t = C.splice(h_last_fwd, h_first_bwd) z1 = Dense(size[2], activation=C.relu)(t) z = Dense(2, activation=None)(z1) filename = str(tmpdir / 'test_large_model_serialization_float.out') z.save(filename) assert os.path.getsize(filename) > two_gb y = C.Function.load(filename, device=device) assert (len(z.parameters) == len(y.parameters)) for param_pair in zip(z.parameters, y.parameters): assert param_pair[0].shape == param_pair[1].shape assert np.allclose(param_pair[0].value, param_pair[1].value)
def __init__(self, n_in, n_out, init_lr, momentum): self.param1 = 512 self.param2 = 256 self.n_in = int(n_in) self.n_out = int(n_out) self.input = C.sequence.input_variable(shape=(self.n_in, )) self.label = C.sequence.input_variable(shape=(self.n_out, )) self.three_dnn = Sequential([ Dense(self.param1, activation=C.tanh), Dense(self.param1, activation=C.tanh), Dense(self.param1, activation=C.tanh) ]) self.rnn_layer1 = Sequential([(Recurrence(LSTM(self.param2)), Recurrence(LSTM(self.param2), go_backwards=True)), C.splice]) self.rnn_layer2 = Sequential([(Recurrence(LSTM(self.param2)), Recurrence(LSTM(self.param2), go_backwards=True)), C.splice]) self.final_dnn = Dense(self.n_out) self.output = self.model(self.input) self.loss = loss_fun(self.output, self.label) self.eval_err = loss_fun(self.output, self.label) self.lr_s = C.learning_rate_schedule(init_lr, C.UnitType.sample) self.mom_s = C.momentum_schedule(momentum) self.learner = C.momentum_sgd(self.output.parameters, lr=self.lr_s, momentum=self.mom_s) self.trainer = C.Trainer(self.output, (self.loss, self.eval_err), [self.learner])
def create_recurrent_network(): # Input variables denoting the features and label data features = sequence.input(((2*context+1)*feature_dim)) labels = sequence.input((num_classes)) # create network model = Sequential([For(range(3), lambda : Recurrence(LSTM(256))), Dense(num_classes)]) z = model(features) ce = cross_entropy_with_softmax(z, labels) errs = classification_error (z, labels) return { 'feature': features, 'label': labels, 'ce' : ce, 'errs' : errs, 'output': z }
def test_recurrent_block(block_type, block_outputs_count, block_size, W_mult, H_mult, expected_res): input_shape = 4 sequenceAxis = Axis('sequenceAxis') y = input(input_shape, dynamic_axes=[Axis.default_batch_axis(), sequenceAxis]) data = np.reshape(np.arange(0,16, dtype=np.float32), (1,4,4)) rnn_block = block_type(block_size, init=0.1) assert len(rnn_block.outputs) == block_outputs_count rnn_net = Recurrence(rnn_block)(y) assert rnn_net.b.shape == (W_mult*block_size,) assert rnn_net.W.shape == (input_shape, W_mult*block_size) assert rnn_net.H.shape == (block_size, H_mult*block_size) res = rnn_net.eval(data) expected = np.asarray(expected_res, dtype=np.float32) np.testing.assert_array_almost_equal(res[0], expected, decimal=6)
def test_recurrence_step_fun(): import cntk as C def step_f(prev1, x): return prev1 * x rec = Recurrence(step_f) def step_f(prev1, prev2, x): return prev1 * prev2 * x, prev1 * x rec = Recurrence(step_f) def step_f(prev1, prev2, prev3, x): return prev1 * prev2 * prev3 * x, prev1 * x, prev2 * x rec = Recurrence(step_f) with pytest.raises(ValueError): def step_f(prev1, prev2, prev3, prev4, x): return prev1 * prev2 * prev3 * x, prev1 * x, prev2 * x, prev4 * x rec = Recurrence(step_f) with pytest.raises(TypeError): v = C.input_variable((1), name='additional_input_variable') step_f = lambda prev, x: prev * v * x rec = Recurrence(step_f) with pytest.raises(TypeError): def step_f(prev1, x): p = C.Parameter((1)) return prev1 * x * p rec = Recurrence(step_f)
def bidirectional_recurrence(fwd, bwd): f = Recurrence(fwd) g = Recurrence(bwd, go_backwards=True) x = cntk.placeholder() return cntk.splice(f(x), g(x))
X = np.eye(vocab_size, dtype=np.float32)[xi] Y = np.eye(vocab_size, dtype=np.float32)[yi] return [X], [Y] get_sample(0) input_sequence = sequence.input_variable(shape=vocab_size) label_sequence = sequence.input_variable(shape=vocab_size) model = Sequential([ For( range(2), lambda: Sequential( [Stabilizer(), Recurrence(LSTM(256), go_backwards=False)])), Dense(vocab_size) ]) z = model(input_sequence) z_sm = cntk.softmax(z) ce = cross_entropy_with_softmax(z, label_sequence) errs = classification_error(z, label_sequence) lr_per_sample = learning_rate_schedule(0.001, UnitType.sample) momentum_time_constant = momentum_as_time_constant_schedule(1100) clipping_threshold_per_sample = 5.0 gradient_clipping_with_truncation = True learner = momentum_sgd( z.parameters,
X = np.eye(vocab_size, dtype=np.float32)[xi] Y = np.eye(vocab_size, dtype=np.float32)[yi] return [X], [Y] sample(0) input_seq_axis = Axis('inputAxis') input_sequence = sequence.input_variable(shape=vocab_size, sequence_axis=input_seq_axis) label_sequence = sequence.input_variable(shape=vocab_size, sequence_axis=input_seq_axis) # model = Sequential([Dense(300),Dense(vocab_size)]) model = Sequential([ For(range(2), lambda: Sequential([Stabilizer(), Recurrence(LSTM(256), go_backwards=False)])), Dense(vocab_size)]) z = model(input_sequence) ce = cross_entropy_with_softmax(z, label_sequence) errs = classification_error(z, label_sequence) lr_per_sample = learning_rate_schedule(0.001, UnitType.sample) momentum_time_constant = momentum_as_time_constant_schedule(1100) clipping_threshold_per_sample = 5.0 gradient_clipping_with_truncation = True learner = momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, gradient_clipping_threshold_per_sample=clipping_threshold_per_sample, gradient_clipping_with_truncation=gradient_clipping_with_truncation) progress_printer = ProgressPrinter(freq=100, tag='Training')
def array(vals): return np.array(vals, dtype=np.float32) if __name__ == '__main__': # TODO: add all Layers tests here and use the correct pytest pattern # ---------------------------------------------- # Recurrence() over regular function # ---------------------------------------------- from cntk.layers import Recurrence from cntk.ops import plus from cntk.debugging import * r = Recurrence(plus) dump_function(r) r.update_signature(1) dump_function(r) data = [ # simple sequence array([[2], [6], [4], [8], [6]]) ] #out = r(data) # BUGBUG: fails with "ValueError: Variable(Plus5_output) with unknown shape detected when compiling the Function graph!" #print(out) # ---------------------------------------------- # sequential convolution without reduction dimension # ---------------------------------------------- from cntk.layers import Convolution
out = net(w_a)[0] if (out[1]>0.5 and l==-1): correct+=1 if (out[0]>0.5 and l==1): correct+=1 total+=1 print("{} out of {} correct ({}%)".format(correct,total,correct/total*100)) z_sm = C.softmax(z) check(z_sm) # Now implement simple RNN words_arr1 = [to_onehot(list(map(char_to_num,list(w)))) for w in words] input_var = sequence.input_variable(vocab_size) label_var = C.input_variable(2) model = Sequential([Recurrence(C.layers.RNNStep(200,activation=C.relu)),sequence.last,Dense(100,activation=C.relu),Dense(2)]) z = model(input_var) z_sm = C.softmax(z) ce = cross_entropy_with_softmax(z, label_var) errs = classification_error(z, label_var) lr_per_sample = learning_rate_schedule(0.02, UnitType.minibatch) learner = C.learners.sgd(z.parameters, lr_per_sample) progress_printer = ProgressPrinter(freq=100, tag='Training') trainer = Trainer(z, (ce, errs), learner, progress_printer) log_number_of_parameters(z) minibatch_size = 10