def test_ctc_class_batch_to_labeling_batch(): LENGTH = 20 BATCHES = 4 CLASSES = 2 LABELS = 2 y_hat = T.tensor3() y_hat_mask = T.matrix('features_mask') y = T.lmatrix('phonemes') y_labeling = ctc_cost._class_batch_to_labeling_batch(y, y_hat, y_hat_mask) Y_hat = np.zeros((LENGTH, BATCHES, CLASSES + 1), dtype=floatX) Y = np.zeros((2, BATCHES), dtype='int64') Y_hat_mask = np.ones((LENGTH, BATCHES), dtype=floatX) Y_hat_mask[-5:] = 0 Y_labeling = y_labeling.eval({y_hat: Y_hat, y: Y, y_hat_mask: Y_hat_mask}) assert Y_labeling.shape == (LENGTH, BATCHES, LABELS)
def test_ctc_log_forward_backward(): LENGTH = 8 BATCHES = 4 CLASSES = 2 N_LABELS = 3 y_hat = T.tensor3('features') input_mask = T.matrix('features_mask') y_hat_mask = input_mask y = T.lmatrix('phonemes') y_mask = T.matrix('phonemes_mask') blanked_y, blanked_y_mask = ctc_cost._add_blanks(y=y, blank_symbol=1, y_mask=y_mask) f, b = ctc_cost._log_forward_backward(blanked_y, y_hat, blanked_y_mask, y_hat_mask, CLASSES) Y_hat = np.zeros((LENGTH, BATCHES, CLASSES + 1), dtype=floatX) Y_hat[:, :, 0] = .7 Y_hat[:, :, 1] = .2 Y_hat[:, :, 2] = .1 Y_hat[3, :, 0] = .3 Y_hat[3, :, 1] = .4 Y_hat[3, :, 2] = .3 Y = np.zeros((N_LABELS, BATCHES), dtype='int64') Y_hat_mask = np.ones((LENGTH, BATCHES), dtype=floatX) Y_hat_mask[-2:] = 0 Y_mask = np.asarray(np.ones_like(Y), dtype=floatX) Y_mask[-2:, 0] = 0 y_prob = ctc_cost._class_batch_to_labeling_batch(blanked_y, y_hat, y_hat_mask) forward_probs = f.eval({ y_hat: Y_hat, y: Y, y_hat_mask: Y_hat_mask, y_mask: Y_mask }) backward_probs = b.eval({ y_hat: Y_hat, y: Y, y_hat_mask: Y_hat_mask, y_mask: Y_mask }) y_probs = y_prob.eval({y_hat: Y_hat, y: Y, y_hat_mask: Y_hat_mask}) assert not ((forward_probs + backward_probs)[:, 0, :] == -np.inf).all() marg = forward_probs + backward_probs - np.log(y_probs) forward_probs = np.exp(forward_probs) backward_probs = np.exp(backward_probs) L = (forward_probs * backward_probs[::-1][:, :, ::-1] / y_probs).sum(2) assert not np.isnan(forward_probs).any()
def test_ctc_log_forward_backward(): LENGTH = 8 BATCHES = 4 CLASSES = 2 N_LABELS = 3 y_hat = T.tensor3('features') input_mask = T.matrix('features_mask') y_hat_mask = input_mask y = T.lmatrix('phonemes') y_mask = T.matrix('phonemes_mask') blanked_y, blanked_y_mask = ctc_cost._add_blanks( y=y, blank_symbol=1, y_mask=y_mask) f, b = ctc_cost._log_forward_backward(blanked_y, y_hat, blanked_y_mask, y_hat_mask, CLASSES) Y_hat = np.zeros((LENGTH, BATCHES, CLASSES + 1), dtype=floatX) Y_hat[:, :, 0] = .7 Y_hat[:, :, 1] = .2 Y_hat[:, :, 2] = .1 Y_hat[3, :, 0] = .3 Y_hat[3, :, 1] = .4 Y_hat[3, :, 2] = .3 Y = np.zeros((N_LABELS, BATCHES), dtype='int64') Y_hat_mask = np.ones((LENGTH, BATCHES), dtype=floatX) Y_hat_mask[-2:] = 0 Y_mask = np.asarray(np.ones_like(Y), dtype=floatX) Y_mask[-2:, 0] = 0 y_prob = ctc_cost._class_batch_to_labeling_batch(blanked_y, y_hat, y_hat_mask) forward_probs = f.eval({y_hat: Y_hat, y: Y, y_hat_mask: Y_hat_mask, y_mask: Y_mask}) backward_probs = b.eval({y_hat: Y_hat, y: Y, y_hat_mask: Y_hat_mask, y_mask: Y_mask}) y_probs = y_prob.eval({y_hat: Y_hat, y: Y, y_hat_mask: Y_hat_mask}) assert not ((forward_probs + backward_probs)[:, 0, :] == -np.inf).all() marg = forward_probs + backward_probs - np.log(y_probs) forward_probs = np.exp(forward_probs) backward_probs = np.exp(backward_probs) L = (forward_probs * backward_probs[::-1][:, :, ::-1] / y_probs).sum(2) assert not np.isnan(forward_probs).any()