def create_model(input_sequence, label_sequence, vocab_dim, hidden_dim): # Create the rnn that computes the latent representation for the next token. rnn_with_latent_output = Sequential([ C.Embedding(hidden_dim), For( range(num_layers), lambda: Sequential([ Stabilizer(), Recurrence(LSTM(hidden_dim), go_backwards=False) ])), ]) # Apply it to the input sequence. latent_vector = rnn_with_latent_output(input_sequence) # Connect the latent output to (sampled/full) softmax. if use_sampled_softmax: weights = load_sampling_weights(token_frequencies_file_path) smoothed_weights = np.float32(np.power(weights, alpha)) sampling_weights = C.reshape(C.Constant(smoothed_weights), shape=(1, vocab_dim)) z, ce, errs = cross_entropy_with_sampled_softmax( latent_vector, label_sequence, vocab_dim, hidden_dim, softmax_sample_size, sampling_weights) else: z, ce, errs = cross_entropy_with_full_softmax(latent_vector, label_sequence, vocab_dim, hidden_dim) return z, ce, errs
def LSTM_layer(input, output_dim, recurrence_hook_h=past_value, recurrence_hook_c=past_value): # we first create placeholders for the hidden state and cell state which we don't have yet dh = placeholder_variable(shape=(output_dim), dynamic_axes=input.dynamic_axes) dc = placeholder_variable(shape=(output_dim), dynamic_axes=input.dynamic_axes) # we now create an LSTM_cell function and call it with the input and placeholders LSTM_cell = LSTM(output_dim) f_x_h_c = LSTM_cell(input, (dh, dc)) h_c = f_x_h_c.outputs # we setup the recurrence by specifying the type of recurrence (by default it's `past_value` -- the previous value) h = recurrence_hook_h(h_c[0]) c = recurrence_hook_c(h_c[1]) replacements = {dh: h.output, dc: c.output} f_x_h_c.replace_placeholders(replacements) h = f_x_h_c.outputs[0] c = f_x_h_c.outputs[1] # and finally we return the hidden state and cell state as functions (by using `combine`) return combine([h]), combine([c])
def test_htk_deserializers(): mbsize = 640 epoch_size = 1000 * mbsize lr = [0.001] feature_dim = 33 num_classes = 132 context = 2 os.chdir(data_path) features_file = "glob_0000.scp" labels_file = "glob_0000.mlf" label_mapping_file = "state.list" fd = HTKFeatureDeserializer( StreamDefs(amazing_features=StreamDef( shape=feature_dim, context=(context, context), scp=features_file))) ld = HTKMLFDeserializer( label_mapping_file, StreamDefs( awesome_labels=StreamDef(shape=num_classes, mlf=labels_file))) reader = MinibatchSource([fd, ld]) features = C.input_variable(((2 * context + 1) * feature_dim)) labels = C.input_variable((num_classes)) model = Sequential( [For(range(3), lambda: Recurrence(LSTM(256))), Dense(num_classes)]) z = model(features) ce = C.cross_entropy_with_softmax(z, labels) errs = C.classification_error(z, labels) learner = C.adam_sgd(z.parameters, lr=C.learning_rate_schedule(lr, C.UnitType.sample, epoch_size), momentum=C.momentum_as_time_constant_schedule(1000), low_memory=True, gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True) trainer = C.Trainer(z, (ce, errs), learner) input_map = { features: reader.streams.amazing_features, labels: reader.streams.awesome_labels } pp = C.ProgressPrinter(freq=0) # just run and verify it doesn't crash for i in range(3): mb_data = reader.next_minibatch(mbsize, input_map=input_map) trainer.train_minibatch(mb_data) pp.update_with_trainer(trainer, with_metric=True) assert True os.chdir(abs_path)
def create_model(output_dim): return Sequential([ LayerStack( num_layers, lambda: Sequential([ Stabilizer(), Recurrence(LSTM(hidden_dim), go_backwards=False) ])), Dense(output_dim) ])