Ejemplo n.º 1
0
def draw_results(img_name, element_positions):
    img_location = os.path.join(str(Path(__file__).parent.parent.parent),
                                'resources', 'input_images')
    img_location = os.path.join(img_location, img_name)

    img = cv2.imread(img_location, cv2.IMREAD_UNCHANGED)  # Read the image.
    result = img
    if len(img.shape) == 3:
        trans_mask = img[:, :, 3] == 0  # Remove any transparency.
        img[trans_mask] = [255, 255, 255, 255]
        img_gray = cv2.cvtColor(
            img, cv2.COLOR_BGR2GRAY)  # Convert to BR2GRAY (grayscale mode).
        th, img_gray = cv2.threshold(img_gray, 127, 255,
                                     cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
        result = img_gray

    img_gray = result

    color = (255, 0, 0)
    thickness = 2
    for el in element_positions:
        start_point = (el[1][0], el[0][0])
        end_point = (el[1][1], el[0][1])
        img_gray = cv2.rectangle(img_gray, start_point, end_point, color,
                                 thickness)

    cv2.imshow("elements", ResizeWithAspectRatio(img_gray, height=900))
    cv2.moveWindow('elements', 200, 200)
    cv2.waitKey()

    construct_output(indent_level="block",
                     message="Input image processing done.")
Ejemplo n.º 2
0
def conv_network_analysis(input_image_name):
    """
    Main function for convolutional network analysis.
    Calls on the network for value recognizing.
    Calls on the network for duration analyzing.
    :param input_image_name: Name of the image that is being analyzed.
    """
    construct_output(indent_level="block",
                     message="Analyzing the elements of the image ({}) with a convolutional network."
                     .format(input_image_name))

    gpus = tf.config.experimental.list_physical_devices('GPU')
    tf.config.experimental.set_memory_growth(gpus[0], True)

    # Import the dataset and split it to training and testing.
    # UNCOMMENT FOR RETRAINING
    (test_arr, test_label), (train_arr, train__label) = prepare_new_data(test_data_percentage=0)
    value_processing_conv_net.train_note_values_conv_net(test_arr, test_label, train_arr, train__label)
    duration_processing_conv_net.train_note_duration_conv_net(test_arr, test_label, train_arr, train__label)

    # Load the trained data from the disk.
    value_names = value_processing_conv_net.analyze_using_saved_data(input_image_name)
    # Load the trained data from the disk.
    durations = duration_processing_conv_net.analyze_using_saved_data(input_image_name)

    construct_output(indent_level="block",
                     message="Done analyzing the elements of the image ({}) with a convolutional network."
                     .format(input_image_name))

    return value_names, durations
Ejemplo n.º 3
0
def generator_main():
    """
    Main function for image processing.
    Calls on module for row splitting (first), and then module for individual elements extraction(second).
    Results are then used for generating the dataset.
    """

    # Get the path to the input images.
    input_images_path = os.path.join(str(Path(__file__).parent.parent.parent),
                                     'resources', 'input_images')
    output_path = os.path.join(str(Path(__file__).parent.parent),
                               'positions_detection', 'resources', 'train')
    output_path_check = [
        f for f in listdir(output_path)
        if not isfile(join(input_images_path, f))
    ]

    # Get all the images in said folder.
    input_images = [
        f for f in listdir(input_images_path)
        if isfile(join(input_images_path, f))
    ]
    # Iterate through those images.
    for input_image in input_images:
        if input_image[:
                       -4] in output_path_check:  # Skip already existing images.
            continue

        input_image_path = os.path.join(
            input_images_path,
            input_image)  # Construct the path to the individual image.

        construct_output(
            indent_level="block",
            message="Processing the resources image ({}).".format(input_image))

        row_positions = split_into_rows(
            input_image_path)  # Firstly, extract rows.
        # Then, extract elements from those rows.
        x_coords_by_row_number = extract_elements_by_template_matching(
            input_image)

        element_positions = list(
        )  # element_positions = list(tuple(Y_UP, Y_DOWN, X_LEFT, X_RIGHT)
        for c in x_coords_by_row_number:
            element_positions.append((row_positions[c[0]], c[1]))

        # draw_results(img_name=input_image, element_positions=element_positions)

        # Use the gathered element positions to generate masks for training.
        generate_train_element(input_image_path, element_positions)
def train_note_values_conv_net(test_data_arr, test_data_label, train_data_arr,
                               train_data_label):
    """
    This function trains the convolutional network for recognizing note values based on resources data.
    Tutorial for this code found here:
    https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/tutorials/keras/classification.ipynb
    The results are saved on a disk so that they can be used without retraining the network.
    :param train_data_label: Labels with names and durations for the train data images.
    :param train_data_arr: Array containing the train images.
    :param test_data_label: Labels with names and durations for the test data images.
    :param test_data_arr: Array containing the test images.
    """

    gpus = tf.config.experimental.list_physical_devices('GPU')
    tf.config.experimental.set_memory_growth(gpus[0], True)
    os.environ[
        'TF_CPP_MIN_LOG_LEVEL'] = '2'  # Alleged fix for some tensorflow bugs.

    construct_output(
        indent_level=0,
        message="Convolutional Network 1 (Note value determining).")

    # Scale these values to a range of 0 to 1 before feeding them to the convolutional network model
    print("Scaling test values to [0-1] range.")
    test_data_arr = test_data_arr / 255.0
    print("Scaling train values to [0-1] range (this will take a while).")
    train_data_arr = train_data_arr / 255.0

    # Construct the path for saving the results of training.
    saved_model_values_path = os.path.abspath(
        os.path.join(str(Path(__file__).parent.parent.parent), 'resources'))
    saved_model_values_path = os.path.join(saved_model_values_path,
                                           'saved_models')
    saved_model_name = "value_processing_net_saved.ckpt"
    saved_model_values_path = os.path.join(saved_model_values_path,
                                           saved_model_name)
    values_model_cb = tf.keras.callbacks.ModelCheckpoint(
        filepath=saved_model_values_path, save_weights_only=True, verbose=1)

    # First network only recognizes the values. No need to feed it unrecognized elements (elements with no value).
    value_network_train_data_arr = np.array([
        x for i, x in enumerate(train_data_arr)
        if train_data_label[i][0][0] != "Uncategorized"
    ])
    value_network_train_data_label = np.array([(x[0][0], x[1])
                                               for x in train_data_label
                                               if x[0][0] != "Uncategorized"])

    value_network_test_data_arr = np.array([
        x for i, x in enumerate(test_data_arr)
        if test_data_label[i][0][0] != "Uncategorized"
    ])
    value_network_test_data_label = np.array([(x[0][0], x[1])
                                              for x in test_data_label
                                              if x[0][0] != "Uncategorized"])

    class_names = [
        "A3",
        "A4",
        "A5",  # class_names contains possible results
        "B3",
        "B4",
        "B5",
        "C3",
        "C4",
        "C5",
        "D3",
        "D4",
        "D5",
        "E3",
        "E4",
        "E5",
        "F3",
        "F4",
        "F5",
        "G3",
        "G4",
        "G5"
    ]

    # Fetch only the labels (note values) from the data.
    value_network_train_data_label = [
        item[0] for item in value_network_train_data_label
    ]

    # Assign the corresponding numerical values to labels.
    value_network_train_data_label_values_numerical = values_to_numerical(
        value_network_train_data_label, class_names)

    with tf.device(
            '/GPU:1'
    ):  # Specify using nvidia discrete GPU instead of Intel integrated graphics.
        construct_output(indent_level=0, message="Start training.")
        # Set up the layers.
        # The first layer in this network, tf.keras.layers.Flatten, transforms the format of the images
        # from a 2D array(200x200px) to 1D array(of 200x200 = 40000 pixels)
        # After  the pixels are flattened, the network consists of a sequence of two tf.keras.layers.Dense layers.
        # These are densely connected, or fully connected, neural layers.
        # The first Dense layer has 128 nodes( or neurons).
        # The second( and last) layer returns an array with length of 22.
        # Each node contains a score that indicates the current image belongs to one of the 22 classes.
        model = tf.keras.Sequential([
            tf.keras.layers.Flatten(input_shape=(200, 200)),
            tf.keras.layers.Dense(128, activation='relu'),
            tf.keras.layers.Dense(22)
        ])

        # Before the model is ready for training, it needs a few more settings.
        # These are added during the model's compile step:
        # Loss function —This measures how accurate the model is during training.
        # You want to minimize this function to "steer" the model in the right direction.
        # Optimizer —This is how the model is updated based on the data it sees and its loss function.
        # Metrics —Used to monitor the training and testing steps.
        # The following example uses accuracy, the fraction of the images that are correctly classified.
        model.compile(optimizer='adam',
                      loss=tf.keras.losses.SparseCategoricalCrossentropy(
                          from_logits=True),
                      metrics=['accuracy'])

        # Training the convolutional network model requires the following steps:
        # Feed the training data to the model.
        # In this example, the training data is in the train_images and train_labels arrays.
        # The model learns to associate images and labels.
        # You ask the model to make predictions about a test set—in this example, the test_images array.
        # Verify that the predictions match the labels from the test_labels array.
        model.fit(value_network_train_data_arr,
                  value_network_train_data_label_values_numerical,
                  epochs=3,
                  callbacks=[values_model_cb])
        construct_output(
            indent_level=0,
            message="Save the network weights to avoid retraining on every run."
        )

        # Attach a softmax layer to convert the logits to probabilities, which are easier to interpret.
        probability_model = tf.keras.Sequential(
            [model, tf.keras.layers.Softmax()])

        # TESTING THE NETWORK. =======================================================================================
        # Compare how the model performs on the test dataset.
        # value_network_test_data_label = [item[0] for item in value_network_test_data_label]
        # value_network_test_data_label_values_numerical = values_to_numerical(
        #     value_network_test_data_label,
        #     class_names)
        # test_loss, test_acc = model.evaluate(value_network_test_data_arr,
        #                                      value_network_test_data_label_values_numerical,
        #                                      verbose=2
        #                                      )
        # print('\nTest accuracy:', test_acc)
        # predictions = probability_model.predict(value_network_test_data_arr)
        # print(predictions[0])
        # print("max= ", np.argmax(predictions[0]))
        # import cv2
        # cv2.imshow("img", value_network_test_data_arr[0])
        # cv2.waitKey()

        construct_output(indent_level=0, message="End training.")
        construct_output(
            indent_level=0,
            message="Convolutional Network 1 (Note value determining) Done.")
def extract_elements_by_template_matching(img_name):
    """
    Main function for element extraction that calls on all the sub-functions.
    :param img_name: Name of the resources image from which image rows where extracted.
    """
    img_location = os.path.join(str(Path(__file__).parent.parent.parent),
                                'resources', 'input_images')
    img_location = os.path.join(img_location, 'input_images_rows',
                                img_name[:-4])
    rows_numerated = [
        row for row in os.listdir(img_location) if row.startswith("row")
    ]

    construct_output(indent_level=0,
                     message="Finding individual elements in the saved rows.")
    construct_output(indent_level=1, message="Reading extracted rows.")

    # element_positions = list(tuple(ROW_NUMBER, X_LEFT, X_RIGHT)
    element_positions = []
    for row_number, row_img in enumerate(rows_numerated):
        construct_output(indent_level=2,
                         message="Reading row number {}.".format(row_number))
        img_rgb = cv2.imread(img_location + "/" + row_img)  # Read the image.
        img_gray = cv2.cvtColor(
            img_rgb, cv2.COLOR_BGR2GRAY)  # Convert it into a gray image.
        image_w, image_h = img_gray.shape[::-1]  # Image dimensions.

        # Construct the path to the templates.
        template_path = os.path.abspath(
            os.path.join(str(Path(__file__).parent.parent.parent), 'resources',
                         'templates'))
        # List all the templates that are not within the 'line_start_templates' subdirectory.
        template_names = [
            t for t in os.listdir(template_path)
            if not str(t).startswith("line_start_templates")
        ]

        # (1) Find templates using template matching.
        # Use the 'match_templates' function to get the locations(list),dimensions(list).
        # Also, get list of booleans on whether the templates are recognized by their names (such as clef_g),
        # or if they need to be processed by a conv. network.
        # Replace the values in 'template_names' with names of the found templates
        t_loc, t_dim, t_recognized_list, found_t_names = match_templates(
            template_names, template_path, img_gray)

        # (2) Get the start and end coordinates of the templates.
        construct_output(
            indent_level=2,
            message="Matching the row elements with the templates.")
        templates_start_end = [(x[0], x[0] + t_dim[index][0])
                               for index, x in enumerate(t_loc)]

        # (3) Save the images in the standard size (200x200 px). Return value only used for visualisation.
        construct_output(
            indent_level=2,
            message="Saving found elements in the row {}.".format(row_number))
        x_coords = find_x_coords(templates_start_end)
        for x_coord in x_coords:
            element_positions.append((row_number, x_coord))
        construct_output(
            indent_level=0,
            message="Finding individual elements in the saved rows done.")

        # RESULT DRAWING (TESTING).
        # from copy import deepcopy
        # from note_recognition_app.image_segmentation_dataset_generator.img_resizer import ResizeWithAspectRatio
        # from note_recognition_app.image_segmentation_dataset_generator.row_splitter_result_visualizer import generate_result_img
        # Draw the results of (2). Leave for checking purposes.
        # tmp_img = deepcopy(img_rgb)
        # for el in templates_start_end:
        #     cv2.rectangle(tmp_img, (el[0], 20), (el[1], image_h - 20), (255, 0, 255), 2)
        # cv2.imshow('Found elements.', ResizeWithAspectRatio(tmp_img, width=1000))
        # cv2.moveWindow('Found elements.', 0, 200)
        # Draw the results of (3). Leave for checking purposes.
        # result_img = generate_result_img(input_images_individual_elements)
        # cv2.imshow('Final Result.', ResizeWithAspectRatio(result_img, width=500))
        # cv2.moveWindow('Final Result.', 0, 400)
        # cv2.waitKey()

    return element_positions
Ejemplo n.º 6
0
def split_into_rows(img_path):
    """
    This function splits the resources image into separate rows of note lines.
    :param img_path: Path to the image.
    :return: boolean: True if successful, false otherwise.
    """
    try:
        construct_output(indent_level=0, message="Row splitting.")
        img_name = img_path[img_path.rfind('\\') + 1:]  # Extract image name from the given path.
        # Directory name for the directory that will hold the rows of the resources image.
        dir_name = os.path.join(str(Path(__file__).parent.parent.parent), 'resources', 'input_images')
        dir_name = os.path.join(dir_name, 'input_images_rows', img_name[:-4])
        try:  # Try creating a directory.
            construct_output(indent_level=1, message="Creating a folder for the image: {}".format(dir_name))
            os.mkdir(dir_name)
        except OSError as e:
            construct_output(indent_level=1, message="Folder already exists: {}".format(dir_name))

        construct_output(indent_level=1, message="Reading the resources image {}.".format(img_name))
        img = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)  # Read the image.
        trans_mask = img[:, :, 3] == 0  # Remove any transparency.
        img[trans_mask] = [255, 255, 255, 255]
        img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  # Convert to BR2GRAY (grayscale mode).
        # Make a black and white image based on a threshold.
        th, img_gray = cv2.threshold(img_gray, 127, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
        image_w, image_h = img_gray.shape[::-1]  # Image dimensions.

        template_path = os.path.abspath(
            os.path.join(str(Path(__file__).parent.parent.parent), 'resources', 'templates', 'line_start_templates'))
        row_templates = [file for file in os.listdir(template_path)]

        construct_output(indent_level=1, message="Finding rows in the image.")
        t_locations = []  # List that will contain all the locations that were found by template matching.
        t_dimensions = []  # List that will contain all the dimensions of the found templates on the locations.
        for t in row_templates:  # Iterate through all of the vertical line templates.
            template = cv2.imread(template_path + "\\" + t, 0)  # Read the template from the path.
            res = cv2.matchTemplate(img_gray, template, cv2.TM_CCOEFF_NORMED)  # Convert the template into a gray image.
            threshold = 0.80  # The threshold to determine whether a part of an image is similar enough to the template.
            locations = np.where(res >= threshold)  # Locations in the image where the template matching found results.
            template_w, template_h = template.shape[::-1]  # Dimensions of the current template.
            # list(zip(*locations[::-1])) -> Match the 'x' and 'y' coordinates into a tuple them and save into a list.
            # Iterate through locations to remove elements already found by previous templates.
            for point in list(zip(*locations[::-1])):
                if len(t_locations) == 0:  # Save the first template matching results without checking.
                    t_locations.append(point)
                    t_dimensions.append((template_w, template_h))  # Also save the template dimensions.
                else:  # Check if 'v_line_locations' already contains the new point +/- 6 px, don't add if true.
                    if not np.intersect1d(list(zip(*t_locations))[1], list(range(point[1] - 6, point[1] + 6))):
                        t_locations.append(point)
                        t_dimensions.append((template_w, template_h))

        construct_output(indent_level=1, message="Saving the found rows into folder: {}".format(dir_name))

        row_positions = list()
        for index, el in enumerate(t_locations):  # Iterate through found locations.
            row_position = tuple((el[1] - 40, el[1] + t_dimensions[index][1] + 40))
            row_positions.append(row_position)

            img_slice_name_and_path = dir_name + "/row" + str(index) + ".png"  # Generate a path and a name.
            img_slice = img_gray[el[1] - 40:el[1] + t_dimensions[index][1] + 40:, 0:image_w]  # Cut the part of the img.
            cv2.imwrite(img_slice_name_and_path, img_slice)  # Save that part of the image.

        return row_positions

    except Exception as e:  # Catch exception.
        print(e)
        exit(-1)  # If any exceptions caught, return False.
    construct_output(indent_level=0, message="Row splitting done.")
Ejemplo n.º 7
0
def generate_train_element(input_image_path, element_positions):
    """
    For input image and element positions on the input image, generate masks using those positions.
    # Standard image size is 2048 x 2048 px when saving.
    """
    STANDARD_IMG_DIM = 4096

    construct_output(
        indent_level=0,
        message="Row Generating the dataset for finding element positions.")
    # Get the image name from the input path.
    img_name = input_image_path.split('\\')[-1][:-4]
    construct_output(indent_level=1,
                     message="Adding {} to the dataset".format(img_name))
    # Generate the path to the training directory.
    train_dataset_input = os.path.join(str(Path(__file__).parent.parent),
                                       'positions_detection', 'resources',
                                       'train', img_name)
    # Generate the path to the original image directory.
    images_folder = os.path.join(train_dataset_input, 'images')
    # Generate the path to the masks directory.
    masks_folder = os.path.join(train_dataset_input, 'masks')

    construct_output(indent_level=2,
                     message="Creating the needed directories.")
    try:  # Generate needed directory structure.
        os.mkdir(train_dataset_input)
        os.mkdir(images_folder)
        os.mkdir(masks_folder)
    except FileExistsError as _:
        pass

    construct_output(indent_level=2,
                     message="Saving a non-transparent image into {}.".format(
                         images_folder))
    # Get the original image.
    org_img = cv2.imread(input_image_path, cv2.IMREAD_UNCHANGED)
    org_img = ResizeWithAspectRatio(
        org_img, width=2600)  # Resize to standard dimensions.
    img_height, img_width = org_img.shape[:
                                          -1]  # Get the dimensions of the original image.

    img_gray = deepcopy(org_img)
    if len(org_img.shape) == 3:  # If the image has transparency.
        trans_mask = org_img[:, :, 3] == 0  # Remove any transparency.
        org_img[trans_mask] = [255, 255, 255, 255]
        img_gray = cv2.cvtColor(
            org_img,
            cv2.COLOR_BGR2GRAY)  # Convert to BR2GRAY (grayscale mode).
        th, img_gray = cv2.threshold(img_gray, 127, 255,
                                     cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)

    if img_height < STANDARD_IMG_DIM:  # Resize height to standard dimensions by adding padding on the bottom.
        additional_lines_h = STANDARD_IMG_DIM - img_height  # Calculate the needed padding.
        additional_lines = np.zeros((additional_lines_h, img_width),
                                    np.uint8)  # Make the padding black.
        img_gray = np.concatenate((img_gray, additional_lines),
                                  axis=0)  # Add the padding.

    if img_width < STANDARD_IMG_DIM:
        additional_lines_w = STANDARD_IMG_DIM - img_width  # Calculate the needed padding.
        additional_lines = np.zeros((STANDARD_IMG_DIM, additional_lines_w),
                                    np.uint8)  # Make the padding black.
        img_gray = np.concatenate((img_gray, additional_lines),
                                  axis=1)  # Add the padding.

    img_height, img_width = img_gray.shape  # Get the NEW dimensions of the original image.
    img_name = img_name + '.png'
    img_name = os.path.join(images_folder, img_name)

    # Save the non-transparent original image into the dataset.
    cv2.imwrite(img_name, ResizeWithAspectRatio(img_gray, width=2048))

    # Construct a black image with the same dimensions.
    black_image = np.zeros((img_height, img_width), np.uint8)

    # Iterate over found elements.
    construct_output(indent_level=2, message="***Creating masks.***")
    for index, element_position in enumerate(element_positions):
        construct_output(indent_level=2,
                         message="Creating mask {}.".format(index))
        # Generate a a copy of a black image.
        temp_black_img = deepcopy(black_image)
        # Get the individual element position.
        y_start = element_position[0][0]
        y_end = element_position[0][1]
        x_start = element_position[1][0]
        x_end = element_position[1][1]
        # Extract the element.
        element = img_gray[y_start:y_end, x_start:x_end]
        # Make a white rectangle over the element position.
        element.fill(255)
        # Save that rectangle onto the black image.
        temp_black_img[y_start:y_end, x_start:x_end] = img_gray[y_start:y_end,
                                                                x_start:x_end]

        # Generate mask name
        # (mask being the black image containing a single white rectangle in the place of the element).
        mask_name = 'mask' + str(index) + '.png'
        mask_name = os.path.join(masks_folder, mask_name)
        # Save the mask.
        cv2.imwrite(mask_name, ResizeWithAspectRatio(temp_black_img,
                                                     width=2048))
        construct_output(indent_level=3,
                         message="Mask {} saved into {}.".format(
                             'mask' + str(index) + '.png', masks_folder))