def assert_output_gate_memory_state_convolution_weights_are_equal(multi_directional_mdlstm, one_directional_mdlstm, direction_index: int): multi_directional_mdlstm_output_gate_memory_state_convolution =\ multi_directional_mdlstm.mdlstm_parameters. \ output_gate_memory_state_convolution out_channels_size = multi_directional_mdlstm_output_gate_memory_state_convolution.weight.size(0) out_channels_per_direction = out_channels_size / 4 start_index = int(out_channels_per_direction * direction_index) end_index = int(out_channels_per_direction * (direction_index + 1)) multi_directional_mdlstm_output_gate_memory_state_convolution_weight_for_direction =\ multi_directional_mdlstm_output_gate_memory_state_convolution.weight[start_index:end_index, :, :] multi_directional_mdlstm_output_gate_memory_state_convolution_bias_for_direction = \ multi_directional_mdlstm_output_gate_memory_state_convolution.bias[start_index:end_index] one_directional_mdlstm_output_gate_memory_state_convolution =\ one_directional_mdlstm.mdlstm_parameters.output_gate_memory_state_convolution if not TensorUtils.tensors_are_equal( multi_directional_mdlstm_output_gate_memory_state_convolution_weight_for_direction, one_directional_mdlstm_output_gate_memory_state_convolution.weight): raise RuntimeError("Error: the weight matrices for the output gate memory state convolution for " + "multi-directional MDLSTM and the corresponding one-directional MDLSTM" + "are not the same") if not TensorUtils.tensors_are_equal( multi_directional_mdlstm_output_gate_memory_state_convolution_bias_for_direction, one_directional_mdlstm_output_gate_memory_state_convolution.bias): raise RuntimeError("Error: the bias matrices for the output gate memory state convolution for " + "multi-directional MDLSTM and the corresponding one-directional MDLSTM" + "are not the same")
def assert_input_convolution_weights_are_equal(multi_directional_mdlstm, one_directional_mdlstm, direction_index: int): multi_directional_mdlstm_parallel_input_column_computation = \ multi_directional_mdlstm.mdlstm_parameters.parallel_input_column_computation full_size = multi_directional_mdlstm_parallel_input_column_computation.\ parallel_convolution.weight.size(0) out_channels_size = full_size out_channels_per_direction = int(out_channels_size / 4) print("out_channels_per_direction: " + str(out_channels_per_direction)) start_index = int(out_channels_per_direction * direction_index) end_index = int(out_channels_per_direction * (direction_index + 1)) print("start_index: " + str(start_index)) print("end_index: " + str(end_index)) multi_directional_mdlstm_weight_for_direction = \ multi_directional_mdlstm_parallel_input_column_computation.\ parallel_convolution.weight[start_index:end_index, :, :].unsqueeze(2) multi_directional_mdlstm_bias_for_direction = \ multi_directional_mdlstm_parallel_input_column_computation.\ parallel_convolution.bias[start_index:end_index] one_directional_mdlstm_input_convolution_computation =\ one_directional_mdlstm.mdlstm_parameters.parallel_multiple_input_convolutions_computation print("multi_directional_mdlstm_weight_for_direction.size()" + str(multi_directional_mdlstm_weight_for_direction.size())) print("one_directional_mdlstm_input_convolution_computation.parallel_convolution.weight.size()" + str(one_directional_mdlstm_input_convolution_computation.parallel_convolution.weight.size())) if not TensorUtils.tensors_are_equal( multi_directional_mdlstm_weight_for_direction, one_directional_mdlstm_input_convolution_computation.parallel_convolution.weight): raise RuntimeError("Error: for direction " + str(direction_index) + " the weight matrices for the input convolution computation for " + "multi-directional MDLSTM" + str(multi_directional_mdlstm_weight_for_direction) + " and the corresponding one-directional MDLSTM" + str(one_directional_mdlstm_input_convolution_computation.parallel_convolution.weight) + "are not the same") if not TensorUtils.tensors_are_equal( multi_directional_mdlstm_bias_for_direction, one_directional_mdlstm_input_convolution_computation.parallel_convolution.bias): raise RuntimeError("Error: the bias matrices for the input convolution computation for " + "multi-directional MDLSTM and the corresponding one-directional MDLSTM" + "are not the same")
def check_dechunking_chunked_tensor_list_recovers_original(tensor_list_chunking, original_tensor_list, input_chunked): input_dechunked = tensor_list_chunking.dechunk_block_tensor_concatenated_along_batch_dimension( input_chunked) if not TensorUtils.tensors_lists_are_equal(original_tensor_list, input_dechunked): for index in range(0, len(original_tensor_list)): print("original[" + str(index) + "].size()" + str(original_tensor_list[index].size())) for index in range(0, len(input_dechunked)): print("input_dechunked[" + str(index) + "].size()" + str(input_dechunked[index].size())) TensorUtils.find_equal_slices_over_batch_dimension(input_chunked) raise RuntimeError("Error: original and de-chunked chunked are not the same")
def assert_results_are_same_with_and_without_packing(activations_example_without_packing, activations_example_with_packing): if not TensorUtils.tensors_are_equal(activations_example_without_packing, activations_example_with_packing ): raise RuntimeError("Error: expected the same activations for MDLSTM forward computation" + "with or without packing, but got different results:\n - without packing:\n" + str(activations_example_without_packing) + "\n - with packing: \n" + str(activations_example_with_packing))
def test_ctc_loss_probabilities_match_labels_third_baidu_example_variant_two_extra_padding_wrong_side( ): ctc_loss = warpctc_pytorch.CTCLoss() print("expected shape of seqLength x batchSize x alphabet_size") # https://stackoverflow.com/questions/48915810/pytorch-contiguous probs = torch.FloatTensor( [ [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0]], # Extra padding is added at the top, which is wrong [[0, 0, 0, 0, 0], [1, 2, 3, 4, 5]], [[0, 0, 0, 0, 0], [6, 7, 8, 9, 10]], [[0, 0, 0, 0, 0], [11, 12, 13, 14, 15]], ] ) # .contiguous() # contiguous is just for performance, does not change results print("probs.size(): " + str(probs.size())) # labels = Variable(torch.IntTensor([ [1, 0], [3, 3], [2, 3]])) # See: https://github.com/SeanNaren/warp-ctc/issues/29 # IMPORTANT !!!: All label sequences are concatenated, without blanks/padding, # and label sizes lists the sizes without padding labels = Variable(torch.IntTensor([1, 3, 3])) # Labels sizes should be equal to number of labels. Because labels are # concatenated, the label sizes essentially instructs where the sequence # boundaries are! label_sizes = Variable(torch.IntTensor([1, 2])) # Prob_sizes instructs on the number of real probabilities, distinguishing # real probabilities from padding # Padding should presumably # (looking at https://github.com/baidu-research/warp-ctc/blob/master/torch_binding/TUTORIAL.md) # be at the bottom, but this should be checked probs_sizes = Variable(torch.IntTensor([1, 3])) probs = Variable( probs, requires_grad=True) # tells autograd to compute gradients for probs optimizer = optim.SGD(list([probs]), lr=0.001) print("probs: " + str(probs)) cost = ctc_loss(probs, labels, probs_sizes, label_sizes) # cost: tensor([ 7.3557]) as in the Baidu tutorial, second example print("cost: " + str(cost)) # Since padding has been added to the wrong side (top instead of bottom) # the results are now expected to change no_longer_expected_cost_tensor = torch.FloatTensor([8.965181350708008]) print("zeros_tensor: " + str(no_longer_expected_cost_tensor)) if TensorUtils.tensors_are_equal(no_longer_expected_cost_tensor, cost): raise RuntimeError("Error: cost expected to be not equal to " + str(no_longer_expected_cost_tensor) + "but was:" + str((float(cost)))) cost.backward() print("cost: " + str(cost)) print("update probabilities...") optimizer.step() print("probs: " + str(probs)) print( ">>> Success: test_ctc_loss_probabilities_match_labels_third_baidu_example_variant_two_extra_padding_wrong_side" )
def check_activation_rows_are_not_equal(activation_rows): # For debugging # print("activation rows sizes after splitting: ") last_activation_row = activation_rows[0] for activation_row in activation_rows[1:]: # print(str(activation_row.size())) if TensorUtils.tensors_are_equal(last_activation_row, activation_row): print(">>> WARNING: activation rows are equal")
def assert_input_convolution_weights_are_equal(multi_directional_mdlstm, one_directional_mdlstm, direction_index: int): multi_directional_mdlstm_input_convolution_computation =\ multi_directional_mdlstm.mdlstm_parameters. \ parallel_multiple_input_convolutions_computations[direction_index] #parallel_multiple_input_convolutions_computations # # out_channels_size = multi_directional_mdlstm_input_convolution_computation.\ # parallel_convolution.weight.size(0) # out_channels_per_direction = out_channels_size / 4 # start_index = int(out_channels_per_direction * direction_index) # end_index = int(out_channels_per_direction * (direction_index + 1)) # # one_directional_weight_from_multi_directional =\ # multi_directional_mdlstm_input_convolution_computation.parallel_convolution.weight[start_index:end_index, # :, :, :] # one_directional_bias_from_multi_directional =\ # multi_directional_mdlstm_input_convolution_computation.parallel_convolution.bias[start_index:end_index] one_directional_mdlstm_input_convolution_computation =\ one_directional_mdlstm.mdlstm_parameters.parallel_multiple_input_convolutions_computation if not TensorUtils.tensors_are_equal( multi_directional_mdlstm_input_convolution_computation. parallel_convolution.weight, one_directional_mdlstm_input_convolution_computation. parallel_convolution.weight): raise RuntimeError( "Error: the weight matrices for the input convolution computation for " + "multi-directional MDLSTM and the corresponding one-directional MDLSTM" + "are not the same") if not TensorUtils.tensors_are_equal( multi_directional_mdlstm_input_convolution_computation. parallel_convolution.bias, one_directional_mdlstm_input_convolution_computation. parallel_convolution.bias): raise RuntimeError( "Error: the bias matrices for the input convolution computation for " + "multi-directional MDLSTM and the corresponding one-directional MDLSTM" + "are not the same")
def test_multi_directional_mdlstm_produces_same_results_as_extracted_one_directional_mdlstms(test_tensor): multi_directional_mdlstm_test = MultiDirectionalMDLSTMTest.create_multi_directional_mdlstm_test() multi_directional_mdlstm_test.multi_directional_mdlstm = multi_directional_mdlstm_test.\ multi_directional_mdlstm.cuda() one_directional_mdlstms = multi_directional_mdlstm_test.\ multi_directional_mdlstm.create_one_directional_mdlstms_from_multi_directional_mdlstm() activations_multi_directional_mdlstm = multi_directional_mdlstm_test.\ multi_directional_mdlstm(list([test_tensor])) print("activations_multi_directional_mdlstm: " + str(activations_multi_directional_mdlstm)) assert len(activations_multi_directional_mdlstm) == 1 activations_for_tensor = activations_multi_directional_mdlstm[0] print("activations_for_tensor.size(): " + str(activations_for_tensor.size())) # if not activations_for_tensor.size(0) == 4: # raise RuntimeError("Error: expected the output tensor to have a size of 4" + # "for its first dimension, i.e. for 4-directional MDLSTM") tensor_flipping_list = MDLSTMExamplesPacking.create_four_directions_tensor_flippings() for direction_index, tensor_flipping in enumerate(tensor_flipping_list): print(">>> direction_index: " + str(direction_index)) one_directional_mdlstm = one_directional_mdlstms[direction_index].cuda() MultiDirectionalMDLSTMTest.assert_input_convolution_weights_are_equal( multi_directional_mdlstm_test.multi_directional_mdlstm, one_directional_mdlstm, direction_index) MultiDirectionalMDLSTMTest.assert_output_gate_memory_state_convolution_weights_are_equal( multi_directional_mdlstm_test.multi_directional_mdlstm, one_directional_mdlstm, direction_index) MultiDirectionalMDLSTMTest.assert_parallel_hidden_and_memory_state_column_computation_weights_are_equal( multi_directional_mdlstm_test.multi_directional_mdlstm, one_directional_mdlstm, direction_index) test_tensor_flipped = tensor_flipping.flip(test_tensor).cuda() activations_one_directional_mdlstm_flipped = one_directional_mdlstm(list([test_tensor_flipped])) # Flip activations back to original orientation activations_one_directional_mdlstm = \ tensor_flipping.flip(activations_one_directional_mdlstm_flipped[0]) start_index = int(direction_index * (activations_for_tensor.size(1) / 4)) end_index = int((direction_index + 1) * (activations_for_tensor.size(1) / 4)) print("start_index: " + str(start_index)) print("end_index: " + str(end_index)) # Activations are concatenated along the channel dimension activations_one_directional_mdlstm_from_four_directional_mdlstm = \ activations_for_tensor[:, start_index:end_index, :, :] print("activations_one_directional_mdlstm_from_four_directional_mdlstm: " + str(activations_one_directional_mdlstm_from_four_directional_mdlstm)) print("activations_one_directional_mdlstm: " + str(activations_one_directional_mdlstm)) if not TensorUtils.tensors_are_equal(activations_one_directional_mdlstm, activations_one_directional_mdlstm_from_four_directional_mdlstm): raise RuntimeError("Error: expected the activation tensors for the one-directional MDLSTM: \n" + str(activations_one_directional_mdlstm) + "\n and the corresponding ones " + " of the 4-directional MDLSTM \n" + str(activations_one_directional_mdlstm_from_four_directional_mdlstm) + " to be the same.")
def test_ctc_loss_probabilities_match_labels_third_baidu_example_variant(): ctc_loss = warpctc_pytorch.CTCLoss() print("expected shape of seqLength x batchSize x alphabet_size") probs = torch.FloatTensor([[[1, 2, 3, 4, 5], [0, 0, 0, 0, 0], [-5, -4, -3, -2, -1]], [[6, 7, 8, 9, 10], [0, 0, 0, 0, 0], [-10, -9, -8, -7, -6]], [[11, 12, 13, 14, 15], [0, 0, 0, 0, 0], [-15, -14, -13, -12, -11]]]).contiguous() # probs = torch.FloatTensor([ # [[-5, -4, -3, -2, -1], [-10, -9, -8, -7, -6], [-15, -14, -13, -12, -11]] # ]). \ # transpose(0, 1).contiguous() print("probs.size(): " + str(probs.size())) # labels = Variable(torch.IntTensor([ [1, 0], [3, 3], [2, 3]])) # See: https://github.com/SeanNaren/warp-ctc/issues/29 # All label sequences are concatenated, without blanks/padding, # and label sizes lists the sizes without padding labels = Variable(torch.IntTensor([3, 3, 1, 2, 3])) # labels = Variable(torch.IntTensor([2, 3])) #labels = Variable(torch.IntTensor([3, 3])) # Labels sizes should be equal to number of labels label_sizes = Variable(torch.IntTensor([2, 1, 2])) #label_sizes = Variable(torch.IntTensor([2])) # This one must be equal to the number of probabilities to avoid a crash probs_sizes = Variable(torch.IntTensor([3, 1, 3])) # probs_sizes = Variable(torch.IntTensor([3])) probs = Variable( probs, requires_grad=True) # tells autograd to compute gradients for probs optimizer = optim.SGD(list([probs]), lr=0.001) print("probs: " + str(probs)) cost = ctc_loss(probs, labels, probs_sizes, label_sizes) # cost: tensor([ 7.3557]) as in the Baidu tutorial, second example print("cost: " + str(cost)) expected_cost_tensor = torch.FloatTensor([13.904030799865723]) print("zeros_tensor: " + str(expected_cost_tensor)) if not TensorUtils.tensors_are_equal(expected_cost_tensor, cost): raise RuntimeError("Error: cost expected to be " + str(expected_cost_tensor) + "but was:" + str((float(cost)))) cost.backward() print("cost: " + str(cost)) print("update probabilities...") optimizer.step() print("probs: " + str(probs)) print( ">>> Success: test_ctc_loss_probabilities_match_labels_third_baidu_example_variant" )
def assert_parallel_hidden_and_memory_state_column_computation_weights_are_equal( multi_directional_mdlstm, one_directional_mdlstm, direction_index: int): multi_directional_mdlstm_parallel_hidden_and_memory_state_column_computation = \ multi_directional_mdlstm.mdlstm_parameters.parallel_hidden_and_memory_state_column_computation out_channels_size = \ multi_directional_mdlstm_parallel_hidden_and_memory_state_column_computation.\ get_paired_input_weightings_output_size() out_channels_per_direction = out_channels_size / 4 start_index = int(out_channels_per_direction * direction_index) end_index = int(out_channels_per_direction * (direction_index + 1)) multi_directional_mdlstm_weight_for_direction = \ multi_directional_mdlstm_parallel_hidden_and_memory_state_column_computation.\ parallel_convolution.weight[start_index:end_index, :, :] multi_directional_mdlstm_bias_for_direction = \ multi_directional_mdlstm_parallel_hidden_and_memory_state_column_computation.\ parallel_convolution.bias[start_index:end_index] one_directional_mdlstm_parallel_hidden_and_memory_state_column_computation = \ one_directional_mdlstm.mdlstm_parameters.parallel_hidden_and_memory_state_column_computation if not TensorUtils.tensors_are_equal( multi_directional_mdlstm_weight_for_direction, one_directional_mdlstm_parallel_hidden_and_memory_state_column_computation. parallel_convolution.weight): raise RuntimeError("Error: the weight matrices for the" + "one_directional_mdlstm_parallel_hidden_and_memory_state_column_computation" + "multi-directional MDLSTM and the corresponding one-directional MDLSTM" + "are not the same") if not TensorUtils.tensors_are_equal( multi_directional_mdlstm_bias_for_direction, one_directional_mdlstm_parallel_hidden_and_memory_state_column_computation. parallel_convolution.bias): raise RuntimeError("Error: the bias matrices for the" + "one_directional_mdlstm_parallel_hidden_and_memory_state_column_computation" + "multi-directional MDLSTM and the corresponding one-directional MDLSTM" + "are not the same")
def test_ctc_loss_probabilities_match_labels_three(): ctc_loss = warpctc_pytorch.CTCLoss() print("expected shape of seqLength x batchSize x alphabet_size") # Gives no loss probs = torch.FloatTensor([[[0, 100, 0, 0, 83], [0, 0, 100, 0, 0], [0, 0, 0, 100, 0]]]).\ transpose(0, 1).contiguous() # # Gives small loss # probs = torch.FloatTensor([[[0, 100, 0, 0, 84], # [0, 0, 100, 0, 0], # [0, 0, 0, 100, 0]]]). \ # transpose(0, 1).contiguous() print("probs.size(): " + str(probs.size())) # No loss # labels = Variable(torch.IntTensor([1, 2, 3])) # Also no loss (possibly because not possible!) # becomes effectively 2-2-2-2 which is length 6! # labels = Variable(torch.IntTensor([2, 2, 2, 2])) # labels (becomes 2-2) (Why is loss also zero?) labels = Variable(torch.IntTensor([1, 1, 1])) # Labels sizes should be equal to the number of labels in the example label_sizes = Variable(torch.IntTensor([3])) # This one must be equal to the number of probabilities to avoid a crash probs_sizes = Variable(torch.IntTensor([3])) probs = Variable( probs, requires_grad=True) # tells autograd to compute gradients for probs optimizer = optim.SGD(list([probs]), lr=0.001) print("probs: " + str(probs)) cost = ctc_loss(probs, labels, probs_sizes, label_sizes) # cost: tensor([ 7.3557]) as in the Baidu tutorial, second example print("cost: " + str(cost)) expected_cost_tensor = torch.FloatTensor([0]) print("zeros_tensor: " + str(expected_cost_tensor)) if not TensorUtils.tensors_are_equal(expected_cost_tensor, cost): raise RuntimeError("Error: cost expected to be " + str(expected_cost_tensor) + "but was:" + str((float(cost)))) cost.backward() print("cost: " + str(cost)) print("update probabilities...") optimizer.step() print("probs: " + str(probs))
def test_simple_normal_and_packed_mdlstm_computation_produce_same_results(): mdlstm_test = MultiDimensionalLSTMTest.create_multi_dimensional_lstm_test() input_tensor = torch.ones(1, 1, 2, 2).cuda() input_tensor_list = list([torch.ones(1, 2, 2).cuda()]) activations, activations_with_examples_packing_list = mdlstm_test.\ get_mdlstm_activations_with_and_without_packing(input_tensor, input_tensor_list) activations_with_examples_packing = activations_with_examples_packing_list[0] print("activations_without_examples_packing: " + str(activations)) print("activations_with_examples_packing: " + str(activations_with_examples_packing)) if not TensorUtils.tensors_are_equal(activations, activations_with_examples_packing[0]): raise RuntimeError("Error: expected the same activations for MDLSTM forward computation" + "with or without packing, but got different results")
def get_original_sizes_from_tensor_list(tensor_list: list): result = list([]) for x in tensor_list: if TensorUtils.number_of_dimensions(x) != 3: raise RuntimeError("Error: tenor x with size " + str(x.size()) + " does not have 3 dimensions, as required") # print("x.size(): " + str(x.size())) original_size = SizeTwoDimensional.create_size_two_dimensional( x.size(1), x.size(2)) # print("original_size: " + str(original_size)) result.append(original_size) # print(" get_original_sizes_from_tensor_list - result: " + str(result)) return result
def clamp_grad(grad_input, clamping_bound, variable_name: str, gradient_computation_mask=None): if not (gradient_computation_mask is None): # print("Applying gradient computation mask " + str(gradient_computation_mask) + " to " + # "grad_output: " + str(grad_input)) grad_output = TensorUtils.apply_binary_mask( grad_input, gradient_computation_mask) else: grad_output = grad_input grad_output = grad_output.clamp(min=-clamping_bound, max=clamping_bound) # if variable_name == "mdlstm - activation_column" or variable_name == "mdlstm - new_memory_state": # print("clamping gradient - " + variable_name) # print("clamp_grad_and_print - grad_input: " + str(grad_input)) # print("clamp_grad_and_print - grad_output: " + str(grad_output)) is_bad_gradient = False if InsideModelGradientClamping.is_bad_grad(grad_input): print("is_bad_grad - grad_input: " + str(grad_input)) ##not util.tensor_utils.TensorUtils.tensors_are_equal(grad_input, grad_output): # https://stackoverflow.com/questions/900392/getting-the-caller-function-name-inside-another-function-in-python print("clamping gradient - " + variable_name) print("clamp_grad_and_print - grad_input: " + str(grad_input)) print("clamp_grad_and_print - grad_output: " + str(grad_output)) is_bad_gradient = True if InsideModelGradientClamping.is_bad_grad(grad_output): print("is_bad_grad - grad_output: " + str(grad_output)) ##not util.tensor_utils.TensorUtils.tensors_are_equal(grad_input, grad_output): # https://stackoverflow.com/questions/900392/getting-the-caller-function-name-inside-another-function-in-python print("clamping gradient - " + variable_name) print("clamp_grad_and_print - grad_input: " + str(grad_input)) print("clamp_grad_and_print - grad_output: " + str(grad_output)) is_bad_gradient = True if is_bad_gradient: raise RuntimeError("Error: found bad gradient") return grad_output
def test_ctc_loss_probabilities_match_labels(): ctc_loss = warpctc_pytorch.CTCLoss() print("expected shape of seqLength x batchSize x alphabet_size") probs = torch.FloatTensor([[[0.9, 1.0, 0.0, 0.0], [0.1, 0.0, 1.0, 1.0]]]).\ transpose(0, 1).contiguous() print("probs.size(): " + str(probs.size())) # No cost labels = Variable(torch.IntTensor([1, 1, 2, 1])) # No cost labels = Variable(torch.IntTensor([1, 1, 1, 1])) # Cost labels = Variable(torch.IntTensor([1, 2, 2, 1])) # No cost labels = Variable(torch.IntTensor([1, 1])) # No cost labels = Variable(torch.IntTensor([2, 2])) # Crash (Apparently must be minimally 2 elements) labels = Variable(torch.IntTensor([2])) # No cost labels = Variable(torch.IntTensor([3, 3])) label_sizes = Variable(torch.IntTensor([2])) # This one must be equal to the number of probabilities to avoid a crash probs_sizes = Variable(torch.IntTensor([2])) probs = Variable( probs, requires_grad=True) # tells autograd to compute gradients for probs optimizer = optim.SGD(list([probs]), lr=0.001) print("probs: " + str(probs)) cost = ctc_loss(probs, labels, probs_sizes, label_sizes) print("cost: " + str(cost)) zero_tensor = torch.zeros(1) print("zeros_tensor: " + str(zero_tensor)) if not TensorUtils.tensors_are_equal(zero_tensor, cost): raise RuntimeError( "Error: loss expected to be zero, since probabilities " + "are maximum for the right labels, but not the case") cost.backward() print("cost: " + str(cost)) print("update probabilities...") optimizer.step() print("probs: " + str(probs))
def extract_summed_rows_from_chunk_with_concatenated_rows(chunk_multiple_rows, number_of_rows, number_of_columns): # print("chunk multiple rows.size(): " + str(chunk_multiple_rows.size())) # print("number of columns: " + str(number_of_columns) + # " number of rows: " + str(number_of_rows)) # Notice the dimension to split on is 1, as for example we have # chunk multiple rows.size(): torch.Size([1, 14, 80]) # That is, the last dimension goes over classes, the first one is # always 1, and the second dimension goes over the width. # Therefore we have to split on dim=1 using the number_of_columns # for the tensor containing the horizontally-concatenated # row activations rows = torch.split(chunk_multiple_rows, number_of_columns, dim=1) if len(rows) != number_of_rows: raise RuntimeError("Error in split: expected " + str(number_of_rows) + "rows but got: " + str(len(rows))) summed_rows = TensorUtils.sum_list_of_tensors(rows) # print("summed_rows.size(): " + str(summed_rows.size())) return summed_rows
def clamp_grad_and_print(grad_input, clamping_bound, variable_name: str, gradient_computation_mask=None): # print("clamping gradient - " + variable_name) # print("number of non-zeros: " + str(TensorUtils.number_of_non_zeros(grad_input))) # torch.set_printoptions(precision=10) # print("maximum element: " + str(torch.max(grad_input))) # print("sum of all elements: " + str(torch.sum(grad_input))) # print("tensor norm: " + str(torch.norm(grad_input) * 10000000000)) # print("clamp_grad_and_print - grad_input: " + str(grad_input)) # nearly_zero_element_mask = grad_input.abs().lt(0.0000000001) # print("nearly_zero_element_mask: " + str(nearly_zero_element_mask)) # grad_output = grad_input # zero_element_indices = torch.masked_select(nearly_zero_element_mask) # print("zero element indices: " + str(zero_element_indices)) # grad_output.view(-1)[zero_element_indices] = 0 # https://stackoverflow.com/questions/45384684/ # replace-all-nonzero-values-by-zero-and-all-zero-values-by-a-specific-value/45386834 grad_output = grad_input.clone() grad_output[grad_input.abs() < 0.0000000001] = 0 # print("grad_output: " + str(grad_output)) # print("grad_output.size(): " + str(grad_output.size())) # print("number of non-zeros after: " + str(TensorUtils.number_of_non_zeros(grad_output))) if not (gradient_computation_mask is None): # print("Applying gradient computation mask " + str(gradient_computation_mask) + " to " + # "grad_output: " + str(grad_input)) grad_output = TensorUtils.apply_binary_mask( grad_output, gradient_computation_mask) else: grad_output = grad_output grad_output = grad_output.clamp(min=-clamping_bound, max=clamping_bound) # print("clamp_grad_and_print - grad_output: " + str(grad_output)) return grad_output
def compute_convolution_result_and_apply_mask( self, previous_state_column: torch.Tensor, mask: torch.Tensor): # This call seems to be causing a memory leak. # This seems to be the memory leak root cause, basically just the call to # the 2D convolution with multiple groups. Perhaps there are too many groups, # e.g. 28 * 2. But it is not clear how to fix this. The bug seems to happen # in pytorch 0.4.0 and 0.4.1 at least. # print("self.number_of_paired_input_weightings_per_group: " + # str(self.number_of_paired_input_weightings_per_group)) result = self.compute_convolution_result(previous_state_column) # self.parallel_convolution(previous_state_column) # result = None # return None if self.clamp_gradients: # print("ParallelMultipleStateWeightingsComputation - register gradient clamping...") # Create a 1d convolution with clamping of the gradient result = InsideModelGradientClamping.\ register_gradient_clamping_default_clamping_bound(result, "parallel_multiple_state_weightings_Computation", mask) # It is necessary to mask the non-valid entries in the convolution result. If this # is not done, then the results will be "incorrect" and also when using examples packing, # the first row in the packed matrix will be treated differently from the first rows # of other examples under vertical row separators. # For this reason, we must mask not only the states computed for the next iteration # during MDLSTM computation but also for the convolution computation the entries that # are not valid # print("result.size(): " + str(result.size())) # print("mask.size(): " + str(mask.size())) # print("self.number_of_paired_input_weighting: " + # str(self.get_number_of_paired_input_weightings())) if not (mask is None): result = TensorUtils.apply_binary_mask(result, mask) # print("compute_convolution_result - result.size():" + str(result.size())) return result
def test_ctc_loss_probabilities_match_labels_second_baidu_example(): ctc_loss = warpctc_pytorch.CTCLoss() print("expected shape of seqLength x batchSize x alphabet_size") probs = torch.FloatTensor([[[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15]]]).\ transpose(0, 1).contiguous() probs.requires_grad_(True) print("probs.size(): " + str(probs.size())) labels = Variable(torch.IntTensor([3, 3])) # Labels sizes should be equal to number of labels label_sizes = Variable(torch.IntTensor([2])) # This one must be equal to the number of probabilities to avoid a crash probs_sizes = Variable(torch.IntTensor([3])) probs = Variable( probs, requires_grad=True) # tells autograd to compute gradients for probs optimizer = optim.SGD(list([probs]), lr=0.001) print("probs: " + str(probs)) cost = ctc_loss(probs, labels, probs_sizes, label_sizes) # cost: tensor([ 7.3557]) as in the Baidu tutorial, second example print("cost: " + str(cost)) expected_cost_tensor = torch.FloatTensor([7.355742931365967]) print("zeros_tensor: " + str(expected_cost_tensor)) if not TensorUtils.tensors_are_equal(expected_cost_tensor, cost): raise RuntimeError("Error: cost expected to be " + str(expected_cost_tensor) + "but was:" + str((float(cost)))) cost.backward() print("cost: " + str(cost)) print("update probabilities...") optimizer.step() print("probs: " + str(probs))
def test_tensor_flipping_twice_retrieves_original(): # a = torch.Tensor([range(1, 25)]).view(1, 2, 3, 4) a = torch.Tensor([range(1, 10)]).view(3, 3) print("a: " + str(a)) flipping_tuples = list([]) flipping_tuples.append((True, False)) flipping_tuples.append((True, True)) flipping_tuples.append((False, True)) for flipping_tuple in flipping_tuples: print(">>> flip height: " + str(flipping_tuple[0]) + ", flip width: " + str(flipping_tuple[1])) tensor_flipping = TensorFlipping.create_tensor_flipping(flipping_tuple[0], flipping_tuple[1]) a_flipped = tensor_flipping.flip(a) print("a_flipped: " + str(a_flipped)) a_flipped_back = tensor_flipping.flip(a_flipped) print("a_flipped_back: " + str(a_flipped_back)) # a_flipped_back = torch.zeros(3, 3) if not TensorUtils.tensors_are_equal(a, a_flipped_back): raise RuntimeError("Error: original tensor:\n " + str(a) + " and flipped, then flipped back tensor:\n " + str(a_flipped_back) + " are not equal")