def cifar_ten_basic_recognition():
    batch_size = 256
    train_loader = data_preprocessing.load_cifar_ten.get_train_loader(
        batch_size)
    test_loader = data_preprocessing.load_cifar_ten.get_test_loader(batch_size)

    # test_mdrnn_cell()
    #test_mdrnn()
    input_height = 32
    input_width = 32
    input_channels = 3
    hidden_states_size = 32
    # https://stackoverflow.com/questions/45027234/strange-loss-curve-while-training-lstm-with-keras
    # Possibly a batch size of 128 leads to more instability in training?
    #batch_size = 128

    compute_multi_directional = False
    # https://discuss.pytorch.org/t/dropout-changing-between-training-mode-and-eval-mode/6833
    use_dropout = False

    # TODO: Add gradient clipping? This might also make training more stable?
    # Interesting link with tips on how to fix training:
    # https://blog.slavv.com/37-reasons-why-your-neural-network-is-not-working-4020854bd607
    # https://discuss.pytorch.org/t/about-torch-nn-utils-clip-grad-norm/13873
    # https://discuss.pytorch.org/t/proper-way-to-do-gradient-clipping/191

    input_size = SizeTwoDimensional.create_size_two_dimensional(
        input_height, input_width)
    #with torch.autograd.profiler.profile(use_cuda=False) as prof:
    train_mdrnn(train_loader, test_loader, input_channels, input_size,
                hidden_states_size, batch_size, compute_multi_directional,
                use_dropout)
    def forward(self, x):

        # if self.input_and_output_are_lists:
        #     tensor_list_chunking = TensorListChunking.create_tensor_list_chunking(x, self.block_size)
        #     x_chunked = tensor_list_chunking.chunk_tensor_list_into_blocks_concatenate_along_batch_dimension(x, True)
        #     output = self.multi_dimensional_lstm(x_chunked)
        #     output_ordered_back_to_input_format = tensor_list_chunking.\
        #         dechunk_block_tensor_concatenated_along_batch_dimension(output)
        #     # print("output_ordered_back_to_input_format : " + str(output_ordered_back_to_input_format ))
        #     return output_ordered_back_to_input_format
        # else:
        original_size = SizeTwoDimensional.create_size_two_dimensional(
            x.size(2), x.size(3))
        # Tensor chunking is created dynamically, so that every batch may have a different
        # two-dimensional size (within each batch, examples must still be of the same size)
        # print("BlockMultiDimensionalLSTM - self.block_size: " + str(self.block_size))
        tensor_chunking = TensorChunking.create_tensor_chunking(
            original_size, self.block_size)

        x_chunked = tensor_chunking.chunk_tensor_into_blocks_concatenate_along_batch_dimension(
            x)
        output = self.multi_dimensional_lstm(x_chunked)
        output_ordered_back_to_input_format = tensor_chunking.\
            dechunk_block_tensor_concatenated_along_batch_dimension(output)
        # print("output_ordered_back_to_input_format : " + str(output_ordered_back_to_input_format ))
        return output_ordered_back_to_input_format
def test_tensor_list_block_chunking_followed_by_dechunking_reconstructs_original_multiple_block_rows(
        tensors_all_have_same_height: bool):
    tensor_one = torch.Tensor([range(1, 33)]).view(2, 4, 4)
    tensor_two = torch.Tensor([range(33, 65)]).view(2, 4, 4)
    block_size = SizeTwoDimensional.create_size_two_dimensional(2, 2)
    test_tensor_list_block_chunking_followed_by_dechunking_reconstructs_original(
        tensor_one, tensor_two, block_size, tensors_all_have_same_height)
Beispiel #4
0
def test_tensor_block_chunking_followed_by_dechunking_reconstructs_original():
    tensor = torch.Tensor([range(1, 97)]).view(2, 2, 4, 6)

    if Utils.use_cuda():
        tensor = tensor.cuda()

    print(tensor)
    print("tensor[0, 0, :, :]: " + str(tensor[0, 0, :, :]))
    # chunking = chunk_tensor_into_blocks_return_as_list(
    #     tensor, SizeTwoDimensional.create_size_two_dimensional(2, 2))
    # print("chunking: " + str(chunking))
    # for item in chunking:
    #     print("item.size(): " + str(item.size()))
    original_size = SizeTwoDimensional.create_size_two_dimensional(4, 6)
    block_size = SizeTwoDimensional.create_size_two_dimensional(2, 2)
    tensor_chunking = TensorChunking.create_tensor_chunking(
        original_size, block_size)
    chunking = tensor_chunking.chunk_tensor_into_blocks_concatenate_along_batch_dimension(
        tensor)
    print("chunking: " + str(chunking))
    print("chunking.size(): " + str(chunking.size()))
    dechunked_tensor = tensor_chunking.dechunk_block_tensor_concatenated_along_batch_dimension(
        chunking)

    print("dechunked_tensor: " + str(dechunked_tensor))

    # https://stackoverflow.com/questions/32996281/how-to-check-if-two-torch-tensors-or-matrices-are-equal
    # https://discuss.pytorch.org/t/tensor-math-logical-operations-any-and-all-functions/6624
    tensors_are_equal = torch.eq(tensor, dechunked_tensor).all()
    print("tensors_are_equal: " + str(tensors_are_equal))
    if not tensors_are_equal:
        raise RuntimeError("Error: original tensor " + str(tensor) +
                           " and dechunked tensor " + str(dechunked_tensor) +
                           " are not equal")
    else:
        print(
            "Success: original tensor and dechunked(chunked(tensor)) are equal"
        )
def test_tensor_block_stacking():
    tensor = torch.Tensor([range(1, 17)]).view(4, 4)
    print("original tensor: " + str(tensor))
    result = TensorBlockStacking.rescale_tensor_by_stacking_tensor_blocks(
        tensor, SizeTwoDimensional.create_size_two_dimensional(2, 2), 1)
    print("result: " + str(result))
    expected_result = torch.Tensor([[[1., 3.], [9., 11.]],
                                    [[2., 4.], [10., 12.]],
                                    [[5., 7.], [13., 15.]],
                                    [[6., 8.], [14., 16.]]])
    if not util.tensor_utils.TensorUtils.tensors_are_equal(
            result, expected_result):
        raise RuntimeError("Error: expected the result to be equal to : " +
                           str(expected_result) + " but got: " + str(result))
    def get_original_sizes_from_tensor_list(tensor_list: list):
        result = list([])
        for x in tensor_list:
            if TensorUtils.number_of_dimensions(x) != 3:
                raise RuntimeError("Error: tenor x with size " +
                                   str(x.size()) +
                                   " does not have 3 dimensions, as required")

            # print("x.size(): " + str(x.size()))
            original_size = SizeTwoDimensional.create_size_two_dimensional(
                x.size(1), x.size(2))
            # print("original_size: " + str(original_size))
            result.append(original_size)
        # print(" get_original_sizes_from_tensor_list - result: " + str(result))
        return result
Beispiel #7
0
    def compute_activations_with_block_mdlstm(self, x):
        # print(">>>Entered compute_activations_with_block_mdlstm...")
        # print("network_to_softmax_network - network input x sizes: " )
        # for element in x:
        #     print(">>> input list element size - " + str(element.size()))
        network_consumed_block_size = SizeTwoDimensional(self.get_real_network().get_height_reduction_factor(),
                                                         self.get_real_network().get_width_reduction_factor())
        # print("Network_consumed_block_size: " + str(network_consumed_block_size))

        # # Plot two row images for debugging
        # for element in x:
        #     if element.size(1) > 64:
        #         print("image to be plotted size: " + str(element.size()))
        #         element_without_channel_dimension = element.squeeze(0)
        #         util.image_visualization.imshow_tensor_2d(element_without_channel_dimension)

        tensor_list_chunking = TensorListChunking.create_tensor_list_chunking(x, network_consumed_block_size)

        # Chunk the input
        input_chunked = tensor_list_chunking. \
            chunk_tensor_list_into_blocks_concatenate_along_batch_dimension(x, False)

        # print("input_chunked.size(): " + str(input_chunked.size()))

        # Debugging: check that the de-chunked version recovers the original
        ModuleIOStructuring.\
            check_dechunking_chunked_tensor_list_recovers_original(tensor_list_chunking, x, input_chunked)

        # print("input_chunked :" + str(input_chunked))

        # Compute the activations on the chunked input
        activations_chunked = self.network(input_chunked)
        # print("network_to_softmax_network - activations_chunked.size(): " + str(activations_chunked.size()))

        # de-chunk the chunked activations
        activations = NetworkToSoftMaxNetwork.dechunk_activations(activations_chunked, tensor_list_chunking)

        #return NetworkToSoftMaxNetwork.get_activations_single_tensor_and_activation_heights_and_widths(
        #    activations, self.input_network_produces_multiple_output_directions)
        multiple_output_directions =  self.input_network_produces_multiple_output_directions or self.use_example_packing 
        # print(">>> multiple_output_directions: " + str(multiple_output_directions))
        return NetworkToSoftMaxNetwork.get_activations_single_tensor_and_activation_heights_and_widths(
            activations, multiple_output_directions)
def train_mdrnn(train_loader, test_loader, input_channels: int,
                input_size: SizeTwoDimensional, hidden_states_size: int,
                batch_size, compute_multi_directional: bool,
                use_dropout: bool):
    import torch.optim as optim

    criterion = nn.CrossEntropyLoss()
    #multi_dimensional_rnn = MultiDimensionalRNN.create_multi_dimensional_rnn(hidden_states_size,
    #                                                                         batch_size,
    #                                                                         compute_multi_directional,
    #                                                                         nonlinearity="sigmoid")
    #multi_dimensional_rnn = MultiDimensionalRNNFast.create_multi_dimensional_rnn_fast(hidden_states_size,
    #                                                                                  batch_size,
    #                                                                                  compute_multi_directional,
    #                                                                                  use_dropout,
    #                                                                                  nonlinearity="sigmoid")

    #multi_dimensional_rnn = MultiDimensionalLSTM.create_multi_dimensional_lstm(hidden_states_size,
    #                                                                           batch_size,
    #                                                                           compute_multi_directional,
    #                                                                           use_dropout,
    #                                                                           nonlinearity="sigmoid")

    # http://pytorch.org/docs/master/notes/cuda.html
    device = torch.device("cuda:0")
    # device_ids should include device!
    # device_ids lists all the gpus that may be used for parallelization
    # device is the initial device the model will be put on
    #device_ids = [0, 1]
    device_ids = [0]

    # multi_dimensional_rnn = MultiDimensionalLSTM.create_multi_dimensional_lstm_fast(input_channels,
    #                                                                                 hidden_states_size,
    #                                                                                 compute_multi_directional,
    #                                                                                 use_dropout,
    #                                                                                 nonlinearity="sigmoid")

    mdlstm_block_size = SizeTwoDimensional.create_size_two_dimensional(4, 4)
    # multi_dimensional_rnn = BlockMultiDimensionalLSTM.create_block_multi_dimensional_lstm(input_channels,
    #                                                                                       hidden_states_size,
    #                                                                                       mdlstm_block_size,
    #                                                                                       compute_multi_directional,
    #                                                                                       use_dropout,
    #                                                                                       nonlinearity="sigmoid")
    #
    # block_strided_convolution_block_size = SizeTwoDimensional.create_size_two_dimensional(4, 4)
    # output_channels = mdlstm_block_size.width * mdlstm_block_size.height * hidden_states_size
    # multi_dimensional_rnn = BlockMultiDimensionalLSTMLayerPair.\
    #     create_block_multi_dimensional_lstm_layer_pair(input_channels, hidden_states_size,
    #                                                    output_channels, mdlstm_block_size,
    #                                                    block_strided_convolution_block_size,
    #                                                    compute_multi_directional,
    #                                                    use_dropout,
    #                                                    nonlinearity="tanh")

    # # An intermediate test case with first a layer-pair that consists of a
    # # BlockMultiDimensionalLSTM layer, followed by a BlockStructuredConvolution layer.
    # # After this comes an additional single block_strided_convolution layer as
    # # opposed to another full layer pair
    # mdlstm_block_size = SizeTwoDimensional.create_size_two_dimensional(4, 4)
    # block_strided_convolution_block_size = SizeTwoDimensional.create_size_two_dimensional(4, 4)
    # multi_dimensional_rnn = BlockMultiDimensionalLSTMLayerPairStacking.\
    #     create_one_layer_pair_plus_second_block_convolution_layer_network(hidden_states_size, mdlstm_block_size,
    #                                                                       block_strided_convolution_block_size)

    # # An intermediate test case with first a layer-pair that consists of a
    # # BlockMultiDimensionalLSTM layer, followed by a BlockStructuredConvolution layer.
    # # After this comes an additional single mdlstm layer as
    # # opposed to another full layer pair
    # mdlstm_block_size = SizeTwoDimensional.create_size_two_dimensional(4, 4)
    # block_strided_convolution_block_size = SizeTwoDimensional.create_size_two_dimensional(4, 4)
    # multi_dimensional_rnn = BlockMultiDimensionalLSTMLayerPairStacking.\
    #     create_one_layer_pair_plus_second_block_mdlstm_layer_network(hidden_states_size, mdlstm_block_size,
    #                                                                       block_strided_convolution_block_size)
    #
    mdlstm_block_size = SizeTwoDimensional.create_size_two_dimensional(4, 2)
    block_strided_convolution_block_size = SizeTwoDimensional.create_size_two_dimensional(
        4, 2)
    multi_dimensional_rnn = MultiDimensionalLSTMLayerPairStacking.\
        create_two_layer_pair_network(hidden_states_size, mdlstm_block_size,
                                      block_strided_convolution_block_size, False)

    network = MultiDimensionalRNNToSingleClassNetwork.\
        create_multi_dimensional_rnn_to_single_class_network(multi_dimensional_rnn, input_size)

    #multi_dimensional_rnn = Net()

    if Utils.use_cuda():
        #multi_dimensional_rnn = multi_dimensional_rnn.cuda()

        network = nn.DataParallel(network, device_ids=device_ids)

        network.to(device)
        #print("multi_dimensional_rnn.module.mdlstm_direction_one_parameters.parallel_memory_state_column_computation :"
        #      + str(multi_dimensional_rnn.module.mdlstm_direction_one_parameters.parallel_memory_state_column_computation))

        #print("multi_dimensional_rnn.module.mdlstm_direction_one_parameters."
        #      "parallel_memory_state_column_computation.parallel_convolution.bias :"
        #      + str(multi_dimensional_rnn.module.mdlstm_direction_one_parameters.
        #            parallel_memory_state_column_computation.parallel_convolution.bias))

        #print("multi_dimensional_rnn.module.mdlstm_direction_one_parameters."
        #      "parallel_hidden_state_column_computation.parallel_convolution.bias :"
        #      + str(multi_dimensional_rnn.module.mdlstm_direction_one_parameters.
        #            parallel_hidden_state_column_computation.parallel_convolution.bias))

    print_number_of_parameters(multi_dimensional_rnn)

    #optimizer = optim.SGD(multi_dimensional_rnn.parameters(), lr=0.001, momentum=0.9)

    # Adding some weight decay seems to do magic, see: http://pytorch.org/docs/master/optim.html
    optimizer = optim.SGD(network.parameters(),
                          lr=0.001,
                          momentum=0.9,
                          weight_decay=1e-5)

    # Faster learning
    #optimizer = optim.SGD(multi_dimensional_rnn.parameters(), lr=0.01, momentum=0.9)

    start = time.time()

    num_gradient_corrections = 0

    for epoch in range(4):  # loop over the dataset multiple times

        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):

            # get the inputs
            inputs, labels = data

            if Utils.use_cuda():
                inputs = inputs.to(device)
                # Set requires_grad(True) directly and only for the input
                inputs.requires_grad_(True)

            # wrap them in Variable
            # labels = Variable(labels)  # Labels need no gradient apparently
            if Utils.use_cuda():
                labels = labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            #print("inputs: " + str(inputs))

            # forward + backward + optimize
            #outputs = multi_dimensional_rnn(Variable(inputs))  # For "Net" (Le Net)
            time_start_network_forward = time.time()
            outputs = network(inputs)
            # print("Time used for network forward: " + str(util.timing.time_since(time_start_network_forward)))
            # print("outputs: " + str(outputs))
            # print("outputs.size(): " + str(outputs.size()))
            #print("labels: " + str(labels))

            time_start_loss_computation = time.time()
            loss = criterion(outputs, labels)
            # print("Time used for loss computation: " + str(util.timing.time_since(time_start_loss_computation)))

            time_start_loss_backward = time.time()

            get_dot = modules.find_bad_gradients.register_hooks(outputs)
            loss.backward()
            dot = get_dot()
            dot.save('mdlstm_find_bad_gradients.dot')
            render('dot', 'png', 'mdlstm_find_bad_gradients.dot')
            raise RuntimeError("stopping after find bad gradients")

            # print("Time used for loss backward: " + str(util.timing.time_since(time_start_loss_backward)))

            # Perform gradient clipping
            made_gradient_norm_based_correction = clip_gradient(
                multi_dimensional_rnn)
            if made_gradient_norm_based_correction:
                num_gradient_corrections += 1

            optimizer.step()

            # print statistics
            # print("loss.data: " + str(loss.data))
            # print("loss.data[0]: " + str(loss.data[0]))
            running_loss += loss.data
            #if i % 2000 == 1999:  # print every 2000 mini-batches
            # See: https://stackoverflow.com/questions/5598181/python-multiple-prints-on-the-same-line
            #print(str(i)+",", end="", flush=True)
            if i % 100 == 99:  # print every 100 mini-batches
                end = time.time()
                running_time = end - start
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 100) +
                      " Running time: " + str(running_time))
                print("Number of gradient norm-based corrections: " +
                      str(num_gradient_corrections))
                running_loss = 0.0
                num_gradient_corrections = 0

    print('Finished Training')

    # Run evaluation
    # multi_dimensional_rnn.set_training(False) # Normal case
    network.module.set_training(False)  # When using DataParallel
    evaluate_mdrnn(test_loader, network, batch_size, device)
Beispiel #9
0
 def dechunk_activations(activations_chunked, tensor_list_chunking):
     return tensor_list_chunking. \
         dechunk_block_tensor_concatenated_along_batch_dimension_changed_block_size(activations_chunked,
                                                                                    SizeTwoDimensional(1, 1))
    def compute_forward_from_chunked_input_using_portions(self, x_chunked, tensor_list_chunking):

        # Sum the results for multiple directions contained in chunks of the result
        if self.compute_multi_directional:

            # print("compute_forward_from_chunked_input_using_portions - x_chunked.size(): " +
            #       str(x_chunked.size()))

            cat_list = list([])
            data_portions = torch.chunk(x_chunked, 4, 0)
            for data_portion in data_portions:
                data_portion_conv_result = self.compute_forward_one_directional(data_portion)
                data_portion_results_per_direction = torch.chunk(data_portion_conv_result, 4, 1)
                data_portion_result = torch.sum(torch.stack(data_portion_results_per_direction, 0), 0)
                cat_list.append(data_portion_result)
            result = torch.cat(cat_list, 0)
        else:
            result = self.compute_forward_one_directional(x_chunked)

        if self.use_example_packing:
            # print("block_strided_convolution - use example packing")
            result = tensor_list_chunking. \
                dechunk_block_tensor_concatenated_along_batch_dimension_changed_block_size(result,
                                                                                           SizeTwoDimensional(1, 1))
        return result
    def compute_forward_from_chunked_input(self, x_chunked, tensor_list_chunking):

        result = self.compute_forward_one_directional(x_chunked)

        # If the input and output are lists, the output of the convolution
        # and activation function must again be converted back to the original list
        # format
        # if self.input_and_output_are_list:
        #     convolution_output_size = SizeTwoDimensional.create_size_two_dimensional(1, 1)
        #     output_ordered_back_to_input_format = tensor_list_chunking. \
        #         dechunk_block_tensor_concatenated_along_batch_dimension_changed_block_size(result,
        #                                                                                    convolution_output_size)
        #     return output_ordered_back_to_input_format

        # print("block_strided_convolution - result.size(): " + str(result.size()))

        # Sum the results for multiple directions contained in chunks of the result
        # If the weights are shared across directions, this summation has already been done over the inputs
        # before computing the convolution
        if self.compute_multi_directional and not self.share_weights_across_directions:
            result = BlockStridedConvolution.chunk_four_parts_on_channel_dimension_and_sum(result)
            # result = TensorUtils.sum_list_of_tensors(results_per_direction)

        if self.use_example_packing:
            # print("block_strided_convolution - use example packing")
            result = tensor_list_chunking. \
                dechunk_block_tensor_concatenated_along_batch_dimension_changed_block_size(result,
                                                                                           SizeTwoDimensional(1, 1))
        return result
 def get_output_size_two_dimensional(self, input_size: SizeTwoDimensional):
     block_size = self.block_size
     height = int(input_size.height / block_size.height)
     width = int(input_size.width / block_size.width)
     return SizeTwoDimensional.create_size_two_dimensional(height, width)