Esempi in Python per Logger, esempi in Python per hvf_extraction_script.utilities.logger.Logger

Esempio n. 1

0

Mostra file

    def import_hvf_list_from_spreadsheet(delimited_string):

        return_dict = {}

        # We will assume the spreadsheet is of correct format - no error checking
        # This means: columns are of correct names (corresponding to metadata, etc)

        # We assume first line is column header, followed by lines of data
        # First, slurp in entire string into a list of dictionaries to we can
        # process each one by one

        list_of_lines = Hvf_Export.slurp_string_to_dict_list(delimited_string)

        # Now, process each one by one:

        for line in list_of_lines:

            file_name = line.pop('file_name')
            Logger.get_logger().log_msg(
                Logger.DEBUG_FLAG_SYSTEM,
                "Reading data for {}".format(file_name))

            hvf_obj = Hvf_Export.get_hvf_object_from_line(line)

            return_dict[file_name] = hvf_obj

        return return_dict

Esempio n. 2

0

Mostra file

    def export_hvf_list_to_spreadsheet(dict_of_hvf):

        # First, generate headers. Major categories of data:
        # 1. Filename source
        # 2. Metadata
        # 3. Raw plot
        # 4. Absolute plots
        # 5. Pattern plots
        # Use keylabel list from Hvf_Object to maintain order/completeness
        metadata_header_list = Hvf_Object.METADATA_KEY_LIST.copy()

        # HEADERS FOR PLOTS. Easiest way is to hard code it. Not very elegant, though.

        plot_size = 100
        raw_val_list = [None] * plot_size
        tdv_list = [None] * plot_size
        tdp_list = [None] * plot_size
        pdv_list = [None] * plot_size
        pdp_list = [None] * plot_size

        for i in range(0, plot_size):
            raw_val_list[i] = "raw" + str(i)
            tdv_list[i] = "tdv" + str(i)
            tdp_list[i] = "tdp" + str(i)
            pdv_list[i] = "pdv" + str(i)
            pdp_list[i] = "pdp" + str(i)

        # Construct our header list
        headers_list = [
            'file_name'
        ] + metadata_header_list + raw_val_list + tdv_list + tdp_list + pdv_list + pdp_list

        # And construct our return array:
        string_list = []
        string_list.append(Hvf_Export.CELL_DELIMITER.join(headers_list))

        # Now, iterate for each HVF object and pull the data:
        for file_name in dict_of_hvf:

            # Grab hvf_obj:
            hvf_obj = dict_of_hvf[file_name]

            Logger.get_logger().log_msg(Logger.DEBUG_FLAG_SYSTEM,
                                        "Converting File {}".format(file_name))

            # Convert to string:
            hvf_obj_line = file_name + Hvf_Export.CELL_DELIMITER + Hvf_Export.convert_hvf_obj_to_delimited_string(
                hvf_obj, metadata_header_list, Hvf_Export.CELL_DELIMITER)

            #hvf_obj_line = Regex_Utils.clean_nonascii(hvf_obj_line)

            # And add line to the running list:
            string_list.append(hvf_obj_line)

        # Finally, return joined string:
        return "\n".join(string_list)

Esempio n. 3

0

Mostra file

File: hvf_value.py Progetto: arvindmvepa/hvf_extraction_script

    def chop_into_char_list(slice):

        # W/H Ratio for character
        MAX_W_H_RATIO = 0.7

        ret_list = []

        slice_h = np.size(slice, 0)

        # Get contours:
        slice_temp = cv2.bitwise_not(slice)
        cnts, hierarchy = cv2.findContours(slice_temp, cv2.RETR_EXTERNAL,
                                           cv2.CHAIN_APPROX_SIMPLE)

        # Sort contours from left to right
        cnts = sorted(cnts, key=Hvf_Value.contour_x_dim)

        # Iterate through the contours:
        for ii in range(len(cnts)):

            # Get bounding box:
            x, y, w, h = cv2.boundingRect(cnts[ii])

            # The contour may contain multiple digits, so need to detect it:
            if (w / slice_h > MAX_W_H_RATIO):
                Logger.get_logger().log_msg(Logger.DEBUG_FLAG_DEBUG,
                                            "Multiple Digits")

                DIGIT_WH_RATIO = 0.575

                # Multiple digits
                expected_num_chars = max(
                    round(((w / slice_h) / DIGIT_WH_RATIO) + 0.15), 1)

                for ii in range(expected_num_chars):
                    x_coor = x + (int(w * ii / expected_num_chars))
                    x_size = int(w / expected_num_chars)

                    # Slice them:
                    char_slice = slice[:, x_coor:x_coor + x_size]

                    # And append slice to list:
                    ret_list.append(char_slice)

            else:
                char_slice = slice[:, x:x + w]

                ret_list.append(char_slice)

        Logger.get_logger().log_msg(Logger.DEBUG_FLAG_DEBUG,
                                    "Showing Element " + str(Hvf_Value.i))
        Logger.get_logger().log_msg(
            Logger.DEBUG_FLAG_DEBUG,
            "Number of elements: " + str(len(ret_list)))
        for ii in range(len(ret_list)):
            show_element_func = (lambda: cv2.imshow(
                'Element ' + str(Hvf_Value.i) + '.' + str(ii), ret_list[ii]))
            Logger.get_logger().log_function(Logger.DEBUG_FLAG_DEBUG,
                                             show_element_func)
        return ret_list

Esempio n. 4

0

Mostra file

File: hvf_bulk_processing.py Progetto: msaifee786/hvf_extraction_script

def get_dict_of_hvf_objs_from_imgs(directory):

    # Read in images from directory:
    list_of_image_file_extensions = [".bmp", ".jpg", ".jpeg", ".png"]
    list_of_img_paths = File_Utils.get_files_within_dir(
        directory, list_of_image_file_extensions)

    dict_of_hvf_objs = {}

    for hvf_img_path in list_of_img_paths:

        path, filename = os.path.split(hvf_img_path)
        Logger.get_logger().log_msg(Logger.DEBUG_FLAG_SYSTEM,
                                    "Reading HVF image " + filename)
        hvf_img = File_Utils.read_image_from_file(hvf_img_path)

        hvf_obj = Hvf_Object.get_hvf_object_from_image(hvf_img)
        hvf_obj.release_saved_image()
        dict_of_hvf_objs[filename] = hvf_obj

    return dict_of_hvf_objs

Esempio n. 5

0

Mostra file

File: hvf_bulk_processing.py Progetto: msaifee786/hvf_extraction_script

def get_dict_of_hvf_objs_from_text(directory):

    # Read in text files from directory:
    list_of_file_extensions = [".txt"]
    list_of_txt_paths = File_Utils.get_files_within_dir(
        directory, list_of_file_extensions)

    dict_of_hvf_objs = {}

    for hvf_txt_path in list_of_txt_paths:

        path, filename = os.path.split(hvf_txt_path)
        Logger.get_logger().log_msg(Logger.DEBUG_FLAG_SYSTEM,
                                    "Reading HVF text file " + filename)

        hvf_txt = File_Utils.read_text_from_file(hvf_txt_path)

        hvf_obj = Hvf_Object.get_hvf_object_from_text(hvf_txt)

        dict_of_hvf_objs[filename] = hvf_obj

    return dict_of_hvf_objs

Esempio n. 6

0

Mostra file

File: image_utils.py Progetto: msaifee786/hvf_extraction_script

    def delete_stray_marks(image, global_threshold, relative_threshold):

        # Threshold by area when to remove a contour:
        plot_area = np.size(image, 0) * np.size(image, 1)

        image_temp = cv2.bitwise_not(image.copy())
        cnts, hierarchy = cv2.findContours(image_temp, cv2.RETR_EXTERNAL,
                                           cv2.CHAIN_APPROX_SIMPLE)
        mask = np.ones(image_temp.shape[:2], dtype="uint8") * 255

        # We want to eliminate small contours. Define relative to entire plot area and/or
        # relative to largest contour
        cnts = sorted(cnts,
                      key=Image_Utils.contour_bound_box_area,
                      reverse=True)

        largest_contour_area = 0

        if (len(cnts) > 0):
            largest_contour_area = Image_Utils.contour_bound_box_area(cnts[0])

        contours_to_mask = []

        # Loop over the contours
        Logger.get_logger().log_msg(
            Logger.DEBUG_FLAG_DEBUG,
            "Looping through contours, length " + str(len(cnts)))
        for c in cnts:

            # Grab size of contour:
            # Can also consider using cv2.contourArea(cnt);
            contour_area = Image_Utils.contour_bound_box_area(c)
            contour_plot_size_fraction = contour_area / plot_area
            contour_relative_size_fraction = contour_area / largest_contour_area

            Logger.get_logger().log_msg(
                Logger.DEBUG_FLAG_DEBUG, "Contour plot size fraction: " +
                str(contour_plot_size_fraction) +
                "; contour relative size fraction: " +
                str(contour_relative_size_fraction))

            # if the contour is too small, draw it on the mask
            if (contour_plot_size_fraction < global_threshold
                    or contour_relative_size_fraction < relative_threshold):
                Logger.get_logger().log_msg(
                    Logger.DEBUG_FLAG_DEBUG,
                    "Found a small contour, masking out")
                contours_to_mask.append(c)

        cv2.drawContours(mask, contours_to_mask, -1, 0, -1)

        # remove the contours from the image
        image = cv2.bitwise_not(
            cv2.bitwise_and(image_temp, image_temp, mask=mask))

        return image

Esempio n. 7

0

Mostra file

File: hvf_test.py Progetto: msaifee786/hvf_extraction_script

    def test_single_image(hvf_image):
        # Load image

        # Set up the logger module:
        debug_level = Logger.DEBUG_FLAG_SYSTEM
        #debug_level = Logger.DEBUG_FLAG_INFO;
        msg_logger = Logger.get_logger().set_logger_level(debug_level)

        # Instantiate hvf object:
        Logger.get_logger().log_time("Single HVF image extraction time",
                                     Logger.TIME_START)
        hvf_obj = Hvf_Object.get_hvf_object_from_image(hvf_image)

        debug_level = Logger.DEBUG_FLAG_TIME
        msg_logger = Logger.get_logger().set_logger_level(debug_level)

        Logger.get_logger().log_time("Single HVF image extraction time",
                                     Logger.TIME_END)

        # Print the display strings:
        print(hvf_obj.get_pretty_string())

        # Get a serialization string of object:
        serialization = hvf_obj.serialize_to_json()

        # Print serialization:
        print(serialization)

        # Test to make sure serialization works:
        hvf_obj2 = Hvf_Object.get_hvf_object_from_text(serialization)

        serialization2 = hvf_obj2.serialize_to_json()

        if (serialization == serialization2):
            Logger.get_logger().log_msg(
                Logger.DEBUG_FLAG_SYSTEM,
                "Passed serialization/deserialization consistency")

            # Check to see if we can release saved images without error:
            hvf_obj.release_saved_image()
            hvf_obj2.release_saved_image()
            Logger.get_logger().log_msg(Logger.DEBUG_FLAG_SYSTEM,
                                        "Passed releasing saved images")
        else:
            Logger.get_logger().log_msg(
                Logger.DEBUG_FLAG_SYSTEM,
                "FAILED serialization/deserialization consistency =============="
            )

            print(serialization)
            Logger.get_logger().log_msg(Logger.DEBUG_FLAG_SYSTEM, "=====")
            print(serialization2)

            # Check to see if we can release saved images without error:
            hvf_obj.release_saved_image()
            hvf_obj2.release_saved_image()

            #for line in difflib.unified_diff(serialization, serialization2, lineterm=''):
            #	print(line);

        # Test HVF Metric calculator:
        Logger.get_logger().log_msg(
            Logger.DEBUG_FLAG_SYSTEM, "Global CIGTS TDP Score: " +
            str(Hvf_Metric_Calculator.get_global_cigts_tdp_score(hvf_obj)))

        if hvf_obj.pat_dev_percentile_array.is_pattern_not_generated():
            pdp_cigts = "Cannot calculate as pattern not generated"
        else:
            pdp_cigts = str(
                Hvf_Metric_Calculator.get_global_cigts_pdp_score(hvf_obj))
        Logger.get_logger().log_msg(Logger.DEBUG_FLAG_SYSTEM,
                                    "Global CIGTS PDP Score: " + pdp_cigts)

        # Need to have wait for window instantiation IF the code generates frames - it does
        # when debugging. Comment for now
        #cv2.waitKey(0);
        cv2.destroyAllWindows()

        return ""

Esempio n. 8

0

Mostra file

File: hvf_test.py Progetto: msaifee786/hvf_extraction_script

    def print_unit_test_aggregate_metrics(testing_data_list):
        debug_level = Logger.DEBUG_FLAG_SYSTEM

        # Things to evaluate/list:
        # - Total number of tests
        # - Average time to extract data
        # - Number/percentage of metadata errors
        # - Number/percentage of value plot errors
        # - Number/percentage of percentile plot errors

        Logger.get_logger().log_msg(
            debug_level,
            "================================================================================"
        )
        Logger.get_logger().log_msg(debug_level,
                                    "UNIT TEST AGGREGATE METRICS:")

        num_tests = len(testing_data_list)

        Logger.get_logger().log_msg(debug_level,
                                    "Total number of tests: " + str(num_tests))

        list_of_times = list(map(lambda x: x["time"], testing_data_list))
        average_time = round(sum(list_of_times) / len(list_of_times))

        if (average_time > 0):
            Logger.get_logger().log_msg(
                debug_level, "Average extraction time per report: " +
                str(average_time) + "ms")

        Logger.get_logger().log_msg(debug_level, "")

        metadata_total = sum(
            list(map(lambda x: x["metadata_vals"], testing_data_list)))
        metadata_errors = sum(
            list(map(lambda x: len(x["metadata_errors"]), testing_data_list)))
        metadata_error_percentage = round(metadata_errors / metadata_total, 3)

        Logger.get_logger().log_msg(
            debug_level,
            "Total number of metadata fields: " + str(metadata_total))
        Logger.get_logger().log_msg(
            debug_level,
            "Total number of metadata field errors: " + str(metadata_errors))
        Logger.get_logger().log_msg(
            debug_level,
            "Metadata field error rate: " + str(metadata_error_percentage))

        Logger.get_logger().log_msg(debug_level, "")

        value_total = sum(
            list(map(lambda x: x["value_plot_vals"], testing_data_list)))
        value_errors = sum(
            list(map(lambda x: len(x["value_plot_errors"]),
                     testing_data_list)))
        value_error_percentage = round(value_errors / value_total, 3)

        Logger.get_logger().log_msg(
            debug_level,
            "Total number of value data points: " + str(value_total))
        Logger.get_logger().log_msg(
            debug_level,
            "Total number of value data point errors: " + str(value_errors))
        Logger.get_logger().log_msg(
            debug_level,
            "Value data point error rate: " + str(value_error_percentage))

        Logger.get_logger().log_msg(debug_level, "")

        perc_total = sum(
            list(map(lambda x: x["perc_plot_vals"], testing_data_list)))
        perc_errors = sum(
            list(map(lambda x: len(x["perc_plot_errors"]), testing_data_list)))
        perc_error_percentage = round(perc_errors / perc_total, 3)

        Logger.get_logger().log_msg(
            debug_level,
            "Total number of percentile data points: " + str(perc_total))
        Logger.get_logger().log_msg(
            debug_level, "Total number of percentile data point errors: " +
            str(perc_errors))
        Logger.get_logger().log_msg(
            debug_level,
            "Percentile data point error rate: " + str(perc_error_percentage))

        return ""

Esempio n. 9

0

Mostra file

File: hvf_test.py Progetto: msaifee786/hvf_extraction_script

    def add_unit_test(test_name, test_type, ref_data_path, test_data_path):

        # Set up the logger module:
        debug_level = Logger.DEBUG_FLAG_ERROR
        msg_logger = Logger.get_logger().set_logger_level(debug_level)

        # First, check if we have this master directory (and image extraciton test directory) or not
        master_path = Hvf_Test.UNIT_TEST_MASTER_PATH
        test_type_path = os.path.join(Hvf_Test.UNIT_TEST_MASTER_PATH,
                                      test_type)
        test_name_path = os.path.join(Hvf_Test.UNIT_TEST_MASTER_PATH,
                                      test_type, test_name)
        test_data_path = os.path.join(test_dir_path,
                                      Hvf_Test.UNIT_TEST_TEST_DIR)
        reference_data_path = os.path.join(test_dir_path,
                                           Hvf_Test.UNIT_TEST_REFERENCE_DIR)

        # If they don't exist yet, create them
        create_path_if_not_present = master_path
        if not os.path.isdir(create_path_if_not_present):
            Logger.get_logger().log_msg(
                Logger.DEBUG_FLAG_SYSTEM, "Making new unit test directory: " +
                create_path_if_not_present)
            os.mkdir(create_path_if_not_present)

        create_path_if_not_present = test_type_path
        if not os.path.isdir(create_path_if_not_present):
            Logger.get_logger().log_msg(
                Logger.DEBUG_FLAG_SYSTEM, "Making new unit test directory: " +
                create_path_if_not_present)
            os.mkdir(create_path_if_not_present)

        create_path_if_not_present = test_name_path
        if not os.path.isdir(create_path_if_not_present):
            Logger.get_logger().log_msg(
                Logger.DEBUG_FLAG_SYSTEM, "Making new unit test directory: " +
                create_path_if_not_present)
            os.mkdir(create_path_if_not_present)

        create_path_if_not_present = test_data_path
        if not os.path.isdir(create_path_if_not_present):
            Logger.get_logger().log_msg(
                Logger.DEBUG_FLAG_SYSTEM, "Making new unit test directory: " +
                create_path_if_not_present)
            os.mkdir(create_path_if_not_present)

        create_path_if_not_present = reference_data_path
        if not os.path.isdir(create_path_if_not_present):
            Logger.get_logger().log_msg(
                Logger.DEBUG_FLAG_SYSTEM, "Making new unit test directory: " +
                create_path_if_not_present)
            os.mkdir(create_path_if_not_present)

        # First, get file name
        ref_path, ref_filename = os.path.split(ref_data_path)
        test_path, test_filename = os.path.split(test_data_path)

        # Just make sure that filenames are the same, because when we test them we look for same file name roots
        ref_filename_root, ref_ext = os.path.splitext(ref_filename)
        test_filename_root, test_ext = os.path.splitext(test_filename)

        if not (test_filename_root == ref_filename_root):
            ref_filename = test_filename_root + "." + ref_ext
            Logger.get_logger().log_msg(
                Logger.DEBUG_FLAG_SYSTEM,
                "Renaming reference file to {} to match with test file".format(
                    ref_filename))

        # Save the files:
        copyfile(ref_data_path, os.path.join(reference_data_path,
                                             ref_filename))
        copyfile(test_data_path, os.path.join(test_data_path, test_filename))

        Logger.get_logger().log_msg(
            Logger.DEBUG_FLAG_SYSTEM,
            "Added unit test - TYPE: {}, NAME: {}".format(
                test_type, test_filename_root))

        return ""

Esempio n. 10

0

Mostra file

File: hvf_value.py Progetto: arvindmvepa/hvf_extraction_script

    def get_value_plot_element(plot_element, plot_element_backup, plot_type):
        # Declare return value
        return_val = 0

        # CV2 just slices images and returns the native image. We mess with the pixels so
        # for cleanliness, just copy it over:
        plot_element = plot_element.copy()

        # First, clean up any small noisy pixels by eliminating small contours
        # Tolerance for stray marks is different depending on plot type

        # Relative to largest contour:
        plot_threshold = 0
        relative_threshold = 0

        if (plot_type == "raw"):
            plot_threshold = 0.005
            relative_threshold = 0.1
        else:
            plot_threshold = 0.005
            relative_threshold = 0.01

        plot_element = Image_Utils.delete_stray_marks(plot_element,
                                                      plot_threshold,
                                                      relative_threshold)
        plot_element_backup = Image_Utils.delete_stray_marks(
            plot_element_backup, plot_threshold, relative_threshold)

        # Now, crop out the borders so we just have the central values - this allows us
        # to standardize size
        x0, x1, y0, y1 = Image_Utils.crop_white_border(plot_element)

        # Now we have bounding x/y coordinates
        # Calculate height and width:
        h = y1 - y0
        w = x1 - x0

        # Sometimes in low quality images, empty cells may have noise - also need to filter
        # based on area of element
        #THRESHOLD_AREA_FRACTION = 0.03;
        #fraction_element_area = (w*h)/(np.size(plot_element, 0)*np.size(plot_element, 1));

        # If this was an empty plot, (or element area is below threshold) we have no value
        #if ((w <= 0) or (h <= 0) or fraction_element_area < THRESHOLD_AREA_FRACTION):
        if (w <= 0) or (h <= 0):
            Logger.get_logger().log_msg(
                Logger.DEBUG_FLAG_DEBUG,
                "Declaring no value because cell is empty/below threshold marks"
            )

            return_val = Hvf_Value.VALUE_NO_VALUE

            Hvf_Value.i = Hvf_Value.i + 1
        else:

            # First, split the slice into a character list:

            list_of_chars = Hvf_Value.chop_into_char_list(
                plot_element[y0:1 + y1, x0:1 + x1])
            list_of_chars_backup = Hvf_Value.chop_into_char_list(
                plot_element[y0:1 + y1, x0:1 + x1])

            # Check for special cases (ie, non-numeric characters)

            # Check if <0 value
            # Can optimize detection accuracy by limiting check to only raw plot values with 2 chars:
            if (plot_type == "raw" and len(list_of_chars) == 2
                    and (Hvf_Value.is_less_than(list_of_chars[0])
                         or Hvf_Value.is_less_than(list_of_chars_backup[0]))):

                Logger.get_logger().log_msg(Logger.DEBUG_FLAG_DEBUG,
                                            "Detected less-than sign")
                return_val = Hvf_Value.VALUE_BELOW_THRESHOLD

            # Check if the above detection worked:
            if (return_val == 0):

                # No, so continue detection for number

                # Determine if we have a minus sign
                is_minus = 1

                # First, look for minus sign - if we have 2 or 3 characters

                # Negative numbers are not present in raw plot
                if not (plot_type == "raw"):

                    if (len(list_of_chars) == 2
                            and Hvf_Value.is_minus(list_of_chars[0])):

                        # Detected minus sign:
                        Logger.get_logger().log_msg(Logger.DEBUG_FLAG_DEBUG,
                                                    "Detected minus sign")

                        # Set our multiplier factor (makes later numeric correction easier)
                        is_minus = -1

                        # Remove the character from the list
                        list_of_chars.pop(0)
                        list_of_chars_backup.pop(0)

                    elif (len(list_of_chars) == 3):
                        # We know there must be a minus sign, so just raise flag
                        Logger.get_logger().log_msg(Logger.DEBUG_FLAG_DEBUG,
                                                    "Assuming minus sign")

                        is_minus = -1
                        # Remove the character from the list
                        list_of_chars.pop(0)
                        list_of_chars_backup.pop(0)

                # Now, look for digits, and calculate running value

                running_value = 0

                for jj in range(len(list_of_chars)):

                    # Pull out our digit to detect, and clean it
                    digit = Hvf_Value.clean_slice(list_of_chars[jj])

                    show_element_func = (lambda: cv2.imshow(
                        'Sub element ' + str(Hvf_Value.i) + "_" + str(jj),
                        digit))
                    Logger.get_logger().log_function(Logger.DEBUG_FLAG_DEBUG,
                                                     show_element_func)

                    Hvf_Value.j = Hvf_Value.j + 1

                    # Search for 0 if it is the trailing 0 of a multi-digit number, or if lone digit and not a minus
                    allow_search_zero = ((jj == len(list_of_chars) - 1) and
                                         (len(list_of_chars) > 1)) or (
                                             (len(list_of_chars) == 1) and
                                             (is_minus == 1))

                    Logger.get_logger().log_msg(
                        Logger.DEBUG_FLAG_DEBUG,
                        "Allow 0 search: " + str(allow_search_zero))
                    Logger.get_logger().log_msg(Logger.DEBUG_FLAG_DEBUG,
                                                "jj: " + str(jj))
                    Logger.get_logger().log_msg(
                        Logger.DEBUG_FLAG_DEBUG,
                        "list_of_chars length: " + str(len(list_of_chars)))

                    best_value, best_loc, best_scale_factor, best_match = Hvf_Value.identify_digit(
                        digit, allow_search_zero)

                    # If not a good match, recheck with alternatively processed image -> may increase yield
                    threshold_match_digit = 0.5

                    if (best_match > 0 and best_match < threshold_match_digit):

                        digit_backup = Hvf_Value.clean_slice(
                            list_of_chars_backup[jj])
                        best_value, best_loc, best_scale_factor, best_match = Hvf_Value.identify_digit(
                            digit_backup, allow_search_zero)

                    running_value = (10 * running_value) + best_value

                Hvf_Value.i = Hvf_Value.i + 1
                Hvf_Value.j = 0

                return_val = running_value * is_minus

        # Debug info string for the best matched value:
        debug_best_match_string = "Best matched value: " + Hvf_Value.get_string_from_value(
            return_val)
        Logger.get_logger().log_msg(Logger.DEBUG_FLAG_INFO,
                                    debug_best_match_string)

        return return_val

Esempio n. 11

0

Mostra file

File: hvf_value.py Progetto: arvindmvepa/hvf_extraction_script

    def identify_digit(plot_element, allow_search_zero):

        # We template match against all icons and look for best fit:
        best_match = None
        best_val = None
        best_loc = None
        best_scale_factor = None

        height = np.size(plot_element, 0)
        width = np.size(plot_element, 1)

        # Can skip 0 if flag tells us to. This can help maximize accuracy in low-res cases
        # Do this when we know something about the digit (it is a leading digit, etc)
        start_index = 0
        if not allow_search_zero:
            start_index = 1

        for ii in range(start_index,
                        len(Hvf_Value.value_icon_templates.keys())):

            for dir in Hvf_Value.value_icon_templates[ii]:

                # First, scale our template value:
                val_icon = Hvf_Value.value_icon_templates[ii][dir]

                plot_element_temp = plot_element.copy()

                scale_factor = 1
                # Use the smaller factor to make sure we fit into the element icon
                if (height < np.size(val_icon, 0)):
                    # Need to upscale plot_element
                    scale_factor = np.size(val_icon, 0) / height
                    plot_element_temp = cv2.resize(plot_element_temp, (0, 0),
                                                   fx=scale_factor,
                                                   fy=scale_factor)

                else:
                    # Need to upscale val_icon
                    scale_factor = height / (np.size(val_icon, 0))
                    val_icon = cv2.resize(val_icon, (0, 0),
                                          fx=scale_factor,
                                          fy=scale_factor)

                # In case the original is too small by width compared to value_icon, need
                # to widen - do so by copymakeborder replicate

                if (np.size(plot_element_temp, 1) < np.size(val_icon, 1)):
                    border = np.size(val_icon, 1) - np.size(
                        plot_element_temp, 1)
                    #plot_element_temp = cv2.copyMakeBorder(plot_element_temp,0,0,0,border,cv2.BORDER_CONSTANT,0);

                # Apply template matching:
                temp_matching = cv2.matchTemplate(plot_element_temp, val_icon,
                                                  cv2.TM_CCOEFF_NORMED)

                # Grab our result
                min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(
                    temp_matching)

                Logger.get_logger().log_msg(
                    Logger.DEBUG_FLAG_DEBUG,
                    "Matching against " + str(ii) + ": " + str(max_val))

                # Check to see if this is our best fit yet:
                if (best_match is None or max_val > best_match):
                    # This is best fit - record the match value and the actual value
                    best_match = max_val
                    best_val = ii
                    best_loc = max_loc
                    best_scale_factor = scale_factor

        # TODO: refine specific cases that tend to be misclassified

        # 1 vs 4
        if (best_val == 4 or best_val == 1):

            # Cut number in half and take bottom half -> find contours
            # If width of contour is most of element --> 4
            # otherwise, 1

            bottom_half = Image_Utils.slice_image(plot_element, 0.5, 0.5, 0, 1)

            cnts, hierarchy = cv2.findContours(
                cv2.bitwise_not(bottom_half.copy()), cv2.RETR_EXTERNAL,
                cv2.CHAIN_APPROX_SIMPLE)

            # Sort contours by width
            largest_contour = sorted(cnts,
                                     key=Hvf_Value.contour_width,
                                     reverse=True)[0]

            if (Hvf_Value.contour_width(largest_contour) > width * 0.8):
                best_val = 4
            else:
                best_val = 1

        return best_val, best_loc, best_scale_factor, best_match

Esempio n. 12

0

Mostra file

    def find_and_delete_triangle_icon(plot_image, triangle_version):
        TRIANGLE_TO_PLOT_RATIO_W = 0.0305

        THRESHOLD_MATCH = 0.6

        # First, copy and resize the template icon:
        if (triangle_version == "v1"):
            triangle_icon = Hvf_Plot_Array.triangle_icon_template_v1.copy()
        else:  #if v2
            triangle_icon = Hvf_Plot_Array.triangle_icon_template_v2.copy()

        scale_factor = (np.size(plot_image, 1) *
                        TRIANGLE_TO_PLOT_RATIO_W) / np.size(triangle_icon, 1)

        triangle_icon = cv2.resize(triangle_icon, (0, 0),
                                   fx=scale_factor,
                                   fy=scale_factor)

        # Template match to find icon:
        temp_matching = cv2.matchTemplate(plot_image, triangle_icon,
                                          cv2.TM_CCOEFF_NORMED)

        # Grab our result
        min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(temp_matching)

        # If we have a match, white out the triangle area
        if (max_val > THRESHOLD_MATCH):
            Logger.get_logger().log_msg(Logger.DEBUG_FLAG_INFO,
                                        "Found triangle icon, deleting it")

            # Get our corners:
            top_left = max_loc
            bottom_right = (max_loc[0] + np.size(triangle_icon, 1),
                            max_loc[1] + np.size(triangle_icon, 0))

            # Declare some pertinent values for the bottom edge of the matching:
            x_start = top_left[0]
            x_end = bottom_right[0]
            row_index = bottom_right[1]

            # We will lengthen the bottom edge until some percentage of pixel start appearing whites
            # Need to calculate this threshold:
            num_pix = x_end - x_start
            PERCENTAGE_THRESHOLD = .10
            WHITE_PIXEL_VALUE = 255
            threshold_pixel_value = int(PERCENTAGE_THRESHOLD * num_pix *
                                        WHITE_PIXEL_VALUE)

            Logger.get_logger().log_msg(
                Logger.DEBUG_FLAG_DEBUG,
                "Number of pixel at border: ({}, {}) => {}".format(
                    str(x_start), str(x_end), str(num_pix)))
            Logger.get_logger().log_msg(
                Logger.DEBUG_FLAG_DEBUG,
                "Threshold pixel value: " + str(threshold_pixel_value))
            # The bottom line tends to be problematic (still some residual Left
            # after erasing matching icon) so manually look for residual
            while True:
                sum_pixels = sum(plot_image[row_index, x_start:x_end])
                Logger.get_logger().log_msg(Logger.DEBUG_FLAG_DEBUG,
                                            "Sum pixels: " + str(sum_pixels))

                if (sum_pixels < threshold_pixel_value):
                    Logger.get_logger().log_msg(
                        Logger.DEBUG_FLAG_INFO,
                        "Lengthening triangle box to cover residual")
                    row_index = row_index + 1
                else:
                    break

            cv2.rectangle(plot_image, top_left, (x_end, row_index), (255), -1)
            #cv2.rectangle(plot_image,max_loc,(x_end, row_index),(0), 1)

            return True
        else:
            Logger.get_logger().log_msg(
                Logger.DEBUG_FLAG_INFO,
                "Did not find triangle icon, matching value " + str(max_val))

            return False

Esempio n. 13

0

Mostra file

File: hvf_bulk_processing.py Progetto: msaifee786/hvf_extraction_script

                                    "Reading HVF text file " + filename)

        hvf_txt = File_Utils.read_text_from_file(hvf_txt_path)

        hvf_obj = Hvf_Object.get_hvf_object_from_text(hvf_txt)

        dict_of_hvf_objs[filename] = hvf_obj

    return dict_of_hvf_objs


###############################################################################
# BULK PROCESSING #############################################################
###############################################################################

Logger.set_logger_level(Logger.DEBUG_FLAG_SYSTEM)

# If flag, then do unit tests:
if (args["image_directory"]):

    # Grab the argument directory for readability
    directory = args["image_directory"]

    dict_of_hvf_objs = get_dict_of_hvf_objs_from_imgs(directory)

    return_string = Hvf_Export.export_hvf_list_to_spreadsheet(dict_of_hvf_objs)

    File_Utils.write_string_to_file(return_string, "output_spreadsheet.tsv")

elif (args["text_directory"]):

Esempio n. 14

0

Mostra file

    def extract_values_from_plot(plot_image, plot_type, icon_type):

        # First, image process for best readability:
        #plot_image = cv2.GaussianBlur(plot_image, (5,5), 0)

        plot_image_backup = plot_image.copy()

        # Perform image processing depending on plot type:
        if (icon_type == Hvf_Plot_Array.PLOT_PERC):
            plot_image = cv2.bitwise_not(
                cv2.adaptiveThreshold(plot_image, 255,
                                      cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                      cv2.THRESH_BINARY_INV, 11, 5))
        elif (icon_type == Hvf_Plot_Array.PLOT_VALUE):
            #plot_image = cv2.GaussianBlur(plot_image, (5,5), 0)
            ret2, plot_image = cv2.threshold(
                plot_image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

            kernel_size = 31
            mean_offset = 15
            plot_image_backup = cv2.bitwise_not(
                cv2.adaptiveThreshold(plot_image_backup, 255,
                                      cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                      cv2.THRESH_BINARY_INV, kernel_size,
                                      mean_offset))

            kernel = np.ones((3, 3), np.uint8)

        # For readability, grab our height/width:
        plot_width = np.size(plot_image, 1)
        plot_height = np.size(plot_image, 0)

        # The elements are laid out roughly within a 10x10 grid
        NUM_CELLS_ROW = Hvf_Plot_Array.NUM_OF_PLOT_ROWS
        NUM_CELLS_COL = Hvf_Plot_Array.NUM_OF_PLOT_COLS

        # Delete triangle icon, if we can find it:
        if (plot_type == Hvf_Plot_Array.PLOT_RAW):
            if not (Hvf_Plot_Array.find_and_delete_triangle_icon(
                    plot_image, "v1")):
                Hvf_Plot_Array.find_and_delete_triangle_icon(plot_image, "v2")

        # Mask out corners:
        corner_mask = Hvf_Plot_Array.generate_corner_mask(
            plot_width, plot_height)
        plot_image = cv2.bitwise_or(plot_image, cv2.bitwise_not(corner_mask))

        # First, declare our return value array, no need to really initialize bc we'll
        # iterate through it
        plot_values_array = 0

        if (icon_type == Hvf_Plot_Array.PLOT_PERC):
            plot_values_array = np.zeros((NUM_CELLS_COL, NUM_CELLS_ROW),
                                         dtype=Hvf_Perc_Icon)

        elif (icon_type == Hvf_Plot_Array.PLOT_VALUE):
            plot_values_array = np.zeros((NUM_CELLS_COL, NUM_CELLS_ROW),
                                         dtype=Hvf_Value)

        plot_image = Hvf_Plot_Array.delete_plot_axes(plot_image)

        # Grab the grid lines:
        grid_line_dict = Hvf_Plot_Array.get_plot_grid_lines(
            plot_image, plot_type, icon_type)

        plot_image_debug_copy = plot_image.copy()
        # Debug code - draws out slicing for the elements on the plot:
        for c in range(Hvf_Plot_Array.NUM_OF_PLOT_COLS + 1):
            x = int(grid_line_dict['col_list'][c] * plot_width)
            #cv2.line(plot_image_debug_copy, (x, 0), (x, plot_height), (0), 1);

        for r in range(Hvf_Plot_Array.NUM_OF_PLOT_ROWS + 1):
            y = int(grid_line_dict['row_list'][r] * plot_height)
            #cv2.line(plot_image_debug_copy, (0, y), (plot_width, y), (0), 1);

        # Debug function for showing the plot:
        show_plot_func = (
            lambda: cv2.imshow("plot " + icon_type, plot_image_debug_copy))
        Logger.get_logger().log_function(Logger.DEBUG_FLAG_DEBUG,
                                         show_plot_func)

        #cv2.imshow("plot " + icon_type, plot_image_debug_copy)
        #cv2.waitKey();

        # We iterate through our array, then slice out the appropriate cell from the plot
        for x in range(0, NUM_CELLS_COL):
            for y in range(0, NUM_CELLS_ROW):

                # Debug info for indicating what cell we're computing:
                Logger.get_logger().log_msg(Logger.DEBUG_FLAG_INFO,
                                            "Cell " + str(x) + "," + str(y))

                # Grab our cell slice for the plot element
                # (remember arguments: slice_image(image, y_ratio, y_size, x_ratio, x_size):

                # The height of the axes tends to extend ~2.5% past the elements on top, bottom
                # The width of the axes tends to extend
                # So we take that into account when we take the slice

                row_grid_val = grid_line_dict['row_list'][y]
                row_grid_val_size = grid_line_dict['row_list'][
                    y + 1] - grid_line_dict['row_list'][y]

                col_grid_val = grid_line_dict['col_list'][x]
                col_grid_val_size = grid_line_dict['col_list'][
                    x + 1] - grid_line_dict['col_list'][x]

                cell_slice = Image_Utils.slice_image(plot_image, row_grid_val,
                                                     row_grid_val_size,
                                                     col_grid_val,
                                                     col_grid_val_size)
                cell_slice_backup = Image_Utils.slice_image(
                    plot_image_backup, row_grid_val, row_grid_val_size,
                    col_grid_val, col_grid_val_size)

                cell_object = 0

                # Then, need to analyze to figure out what element is in this position
                # What we look for depends on type of plot - perc vs value
                if (icon_type == Hvf_Plot_Array.PLOT_PERC):

                    if (Hvf_Plot_Array.PLOT_ELEMENT_BOOLEAN_MASK[y][x]):
                        # This element needs to be detected

                        # Because this step relies on many things going right, possible that our
                        # slices are not amenable to template matching and cause an error
                        # So, try it under a try-except clause. If failure, we place a failure
                        # placeholder

                        try:
                            cell_object = Hvf_Perc_Icon.get_perc_icon_from_image(
                                cell_slice)
                            Logger.get_logger().log_msg(
                                Logger.DEBUG_FLAG_INFO,
                                "Percentile Icon detected: " +
                                cell_object.get_display_string())

                        except:
                            Logger.get_logger().log_msg(
                                Logger.DEBUG_FLAG_WARNING,
                                "Cell " + str(x) + "," + str(y) +
                                ": Percentile icon detection failure")
                            cell_object = Hvf_Perc_Icon.get_perc_icon_from_char(
                                Hvf_Perc_Icon.PERC_FAILURE_CHAR)
                            raise Exception(str(e))

                    else:
                        # This is a no-detect element, so just instantiate a blank:
                        cell_object = Hvf_Perc_Icon.get_perc_icon_from_char(
                            Hvf_Perc_Icon.PERC_NO_VALUE_CHAR)
                        Logger.get_logger().log_msg(
                            Logger.DEBUG_FLAG_INFO,
                            "Masking element - generating NO VALUE element")

                elif (icon_type == Hvf_Plot_Array.PLOT_VALUE):

                    if (Hvf_Plot_Array.PLOT_ELEMENT_BOOLEAN_MASK[y][x]):
                        # This element needs to be detected

                        # Because this step relies on many things going right, possible that our
                        # slices are not amenable to template matching and cause an error
                        # So, try it under a try-except clause. If failure, we place a failure
                        # placeholder to fix later

                        try:
                            cell_object = Hvf_Value.get_value_from_image(
                                cell_slice, cell_slice_backup, plot_type)
                            Logger.get_logger().log_msg(
                                Logger.DEBUG_FLAG_INFO, "Value detected: " +
                                cell_object.get_display_string())

                        except Exception as e:
                            Logger.get_logger().log_msg(
                                Logger.DEBUG_FLAG_WARNING, "Cell " + str(x) +
                                "," + str(y) + ": Value detection failure")
                            cell_object = Hvf_Value.get_value_from_display_string(
                                Hvf_Value.VALUE_FAILURE)
                            raise Exception(str(e))

                    else:
                        # This is a no-detect element, so just instantiate a blank:
                        cell_object = Hvf_Value.get_value_from_display_string(
                            Hvf_Value.VALUE_NO_VALUE)
                        Logger.get_logger().log_msg(
                            Logger.DEBUG_FLAG_INFO,
                            "Masking element - generating NO VALUE element")

                Logger.get_logger().log_msg(Logger.DEBUG_FLAG_INFO, "=====")

                # Lastly, store into array:
                plot_values_array[x, y] = cell_object

        wait_func = (lambda: cv2.waitKey(0))
        Logger.get_logger().log_function(Logger.DEBUG_FLAG_DEBUG, wait_func)
        destroy_windows_func = (lambda: cv2.destroyAllWindows())
        Logger.get_logger().log_function(Logger.DEBUG_FLAG_DEBUG,
                                         destroy_windows_func)

        # Return our array:
        return plot_values_array

Esempio n. 15

0

Mostra file

    def get_plot_grid_lines(plot_image, plot_type, icon_type):

        Logger.get_logger().log_msg(Logger.DEBUG_FLAG_INFO,
                                    "Finding grid lines")

        plot_w = np.size(plot_image, 1)
        plot_h = np.size(plot_image, 0)

        horizontal_img = plot_image.copy()
        vertical_img = plot_image.copy()

        # [Horizontal]
        # Specify size on horizontal axis
        horizontal_size = horizontal_img.shape[1]

        # Create structure element for extracting horizontal lines through morphology operations
        horizontalStructure = cv2.getStructuringElement(
            cv2.MORPH_RECT, (horizontal_size, 1))

        # Apply morphology operations
        horizontal_img = cv2.morphologyEx(horizontal_img,
                                          cv2.MORPH_OPEN,
                                          horizontalStructure,
                                          iterations=2)
        #horizontal_img = cv2.erode(horizontal_img, horizontalStructure)
        #horizontal_img = cv2.dilate(horizontal_img, horizontalStructure)

        # Then, take a slice from the middle of the plot, and find contours
        # We will use this to help find grid lines
        horizontal_slice = Image_Utils.slice_image(horizontal_img, 0, 1, 0.475,
                                                   0.05)
        horizontal_slice = cv2.copyMakeBorder(horizontal_slice, 0, 0, 1, 1,
                                              cv2.BORDER_CONSTANT, 0)

        # Then, find contours (of the blank spaces) and convert to their respective centroid:
        horizontal_cnts, hierarchy = cv2.findContours(horizontal_slice,
                                                      cv2.RETR_EXTERNAL,
                                                      cv2.CHAIN_APPROX_SIMPLE)

        centroid_horizontal = list(
            map((lambda c: Hvf_Plot_Array.get_contour_centroid(c)[1] / plot_h),
                horizontal_cnts))

        # [Vertical]
        # Specify size on vertical axis
        vertical_size = vertical_img.shape[1]

        # Create structure element for extracting vertical lines through morphology operations
        verticalStructure = cv2.getStructuringElement(cv2.MORPH_RECT,
                                                      (1, vertical_size))

        # Apply morphology operations
        vertical_img = cv2.morphologyEx(vertical_img,
                                        cv2.MORPH_OPEN,
                                        verticalStructure,
                                        iterations=2)
        #vertical_img = cv2.erode(vertical_img, verticalStructure)
        #vertical_img = cv2.dilate(vertical_img, verticalStructure)

        # Then, take a slice from the middle of the plot, and find contours
        # We will use this to help find grid lines
        vertical_slice = Image_Utils.slice_image(vertical_img, 0.475, 0.05, 0,
                                                 1)
        vertical_slice = cv2.copyMakeBorder(vertical_slice, 1, 1, 0, 0,
                                            cv2.BORDER_CONSTANT, 0)

        # Then, find contours (of the blank spaces) and convert to their respective centroid:
        vertical_cnts, hierarchy = cv2.findContours(vertical_slice,
                                                    cv2.RETR_EXTERNAL,
                                                    cv2.CHAIN_APPROX_SIMPLE)

        centroid_vertical = list(
            map((lambda c: Hvf_Plot_Array.get_contour_centroid(c)[0] / plot_w),
                vertical_cnts))

        # Now, we need to find the grid lines
        # We assume grid lines are centered in the middle of plot image (since they
        # are detected that way). Have prelim grid lines, and move then accordingly
        # to fit into empty spaces

        # Columns:
        col_list = []

        # Pre-calculate some values:
        slice_w = np.size(vertical_slice, 1)
        slice_h = np.size(vertical_slice, 0)

        for c in range(Hvf_Plot_Array.NUM_OF_PLOT_COLS + 1):

            # Get our prelim column value:
            col_val = 0.5 - (0.097 * (5 - c))

            # Precalculate our coordinates to check:
            y = int(slice_h * 0.5)
            x = int(col_val * slice_w)

            if (x >= slice_w):
                x = slice_w - 1

            # If this grid line does not coincide with a plot element area, then its good

            if (vertical_slice[y, x] == 255):
                # Grid line falls into blank area - we can record value
                Logger.get_logger().log_msg(
                    Logger.DEBUG_FLAG_INFO,
                    "Prelim column {} grid line works".format(c))
                col_list.append(col_val)

            else:
                # It coincides -> convert it to the closest centroid of a blank area
                Logger.get_logger().log_msg(
                    Logger.DEBUG_FLAG_INFO,
                    "Shifting column grid line {} to nearest centroid".format(
                        c))
                closest_centroid = list(
                    sorted(centroid_vertical,
                           key=(lambda x: abs(x - col_val))))[0]

                col_list.append(closest_centroid)

        # Rows:
        row_list = []

        # Pre-calculate some values:
        slice_w = np.size(horizontal_slice, 1)
        slice_h = np.size(horizontal_slice, 0)
        for r in range(Hvf_Plot_Array.NUM_OF_PLOT_ROWS + 1):

            # Get our prelim row value:
            row_val = 0.5 - (0.095 * (5 - r))

            # Precalculate our coordinates to check:
            y = int(row_val * slice_h)
            x = int(slice_w * 0.5)

            if (y >= slice_h):
                y = slice_h - 1

            # If this grid line does not coincide with a plot element area, then its good

            if (horizontal_slice[y, x] == 255):
                # Grid line falls into blank area - we can record value
                Logger.get_logger().log_msg(
                    Logger.DEBUG_FLAG_INFO,
                    "Prelim row {} grid line works".format(r))
                row_list.append(row_val)

            else:
                # It coincides -> convert it to the closest centroid of a blank area
                Logger.get_logger().log_msg(
                    Logger.DEBUG_FLAG_INFO,
                    "Shifting row grid line {} to nearest centroid".format(r))
                closest_centroid = list(
                    sorted(centroid_horizontal,
                           key=(lambda y: abs(y - row_val))))[0]

                row_list.append(closest_centroid)

        # Collect our two lists and return them together:
        return_dict = {}
        return_dict['row_list'] = row_list
        return_dict['col_list'] = col_list

        return return_dict

Esempio n. 16

0

Mostra file

File: hvf_test.py Progetto: msaifee786/hvf_extraction_script

    def test_unit_tests(sub_dir, test_type):

        # Set up the logger module:
        debug_level = Logger.DEBUG_FLAG_ERROR
        msg_logger = Logger.get_logger().set_logger_level(debug_level)

        # Error check to make sure that sub_dir exists
        test_dir_path = os.path.join(Hvf_Test.UNIT_TEST_MASTER_PATH, test_type,
                                     sub_dir)
        test_data_path = os.path.join(test_dir_path,
                                      Hvf_Test.UNIT_TEST_TEST_DIR)
        reference_data_path = os.path.join(test_dir_path,
                                           Hvf_Test.UNIT_TEST_REFERENCE_DIR)

        if (not os.path.isdir(test_dir_path)):
            # This path does not exist!
            Logger.get_logger().log_msg(
                Logger.DEBUG_FLAG_ERROR,
                "Unit test directory \'{}\' does not exist".format(
                    test_dir_path))
            return ""

        if (not os.path.isdir(test_data_path)):
            # This path does not exist!
            Logger.get_logger().log_msg(
                Logger.DEBUG_FLAG_ERROR,
                "Unit test directory \'{}\' does not exist".format(
                    test_data_path))
            return ""

        if (not os.path.isdir(reference_data_path)):
            # This path does not exist!
            Logger.get_logger().log_msg(
                Logger.DEBUG_FLAG_ERROR,
                "Unit test directory \'{}\' does not exist".format(
                    reference_data_path))
            return ""

        Logger.get_logger().log_msg(
            debug_level,
            "================================================================================"
        )
        Logger.get_logger().log_msg(debug_level, "Starting HVF Unit Testing")
        Logger.get_logger().log_msg(debug_level,
                                    "Test Type: {}".format(test_type))
        Logger.get_logger().log_msg(debug_level,
                                    "Unit Test Name: {}".format(sub_dir))

        # Declare variable to keep track of times, errors, etc
        # Will be a list of raw data --> we will calculate metrics at the end
        aggregate_testing_data_dict = {}

        # For each file in the test folder:
        for hvf_file in os.listdir(test_data_path):

            # Skip hidden files:
            if hvf_file.startswith('.'):
                continue

            # Then, find corresponding reference file
            filename_root, ext = os.path.splitext(hvf_file)

            reference_hvf_obj = None
            test_hvf_obj = None

            # How to generate hvf obj from these files depends on what type of test:

            if (test_type == Hvf_Test.UNIT_TEST_IMAGE_VS_SERIALIZATION):
                # Load image, convert to an hvf_obj
                hvf_image_path = os.path.join(test_data_path, hvf_file)
                hvf_image = File_Utils.read_image_from_file(hvf_image_path)

                Logger.get_logger().log_time("Test " + filename_root,
                                             Logger.TIME_START)
                test_hvf_obj = Hvf_Object.get_hvf_object_from_image(hvf_image)
                time_elapsed = Logger.get_logger().log_time(
                    "Test " + filename_root, Logger.TIME_END)

                serialization_path = os.path.join(reference_data_path,
                                                  filename_root + ".txt")
                serialization = File_Utils.read_text_from_file(
                    serialization_path)
                reference_hvf_obj = Hvf_Object.get_hvf_object_from_text(
                    serialization)

            elif (test_type == Hvf_Test.UNIT_TEST_IMAGE_VS_DICOM):
                # Load image, convert to an hvf_obj
                hvf_image_path = os.path.join(test_data_path, hvf_file)
                hvf_image = File_Utils.read_image_from_file(hvf_image_path)

                Logger.get_logger().log_time("Test " + filename_root,
                                             Logger.TIME_START)
                test_hvf_obj = Hvf_Object.get_hvf_object_from_image(hvf_image)
                time_elapsed = Logger.get_logger().log_time(
                    "Test " + filename_root, Logger.TIME_END)

                dicom_file_path = os.path.join(reference_data_path,
                                               filename_root + ".dcm")
                dicom_ds = File_Utils.read_dicom_from_file(dicom_file_path)
                reference_hvf_obj = Hvf_Object.get_hvf_object_from_dicom(
                    dicom_ds)

            elif (test_type == Hvf_Test.UNIT_TEST_SERIALIZATION_VS_DICOM):

                serialization_file_path = os.path.join(test_data_path,
                                                       hvf_file)
                serialization = File_Utils.read_text_from_file(
                    serialization_file_path)
                test_hvf_obj = Hvf_Object.get_hvf_object_from_text(
                    serialization)

                dicom_file_path = os.path.join(reference_data_path,
                                               filename_root + ".dcm")
                dicom_ds = File_Utils.read_dicom_from_file(dicom_file_path)
                reference_hvf_obj = Hvf_Object.get_hvf_object_from_dicom(
                    dicom_ds)

                time_elapsed = 0

            elif (test_type ==
                  Hvf_Test.UNIT_TEST_SERIALIZATION_VS_SERIALIZATION):

                serialization_file_path = os.path.join(test_data_path,
                                                       hvf_file)
                serialization = File_Utils.read_text_from_file(
                    serialization_file_path)
                test_hvf_obj = Hvf_Object.get_hvf_object_from_text(
                    serialization)

                ref_serialization_path = os.path.join(reference_data_path,
                                                      filename_root + ".txt")
                ref_serialization = File_Utils.read_text_from_file(
                    ref_serialization_path)
                reference_hvf_obj = Hvf_Object.get_hvf_object_from_text(
                    ref_serialization)

                time_elapsed = 0

            else:
                Logger.get_logger().log_msg(
                    Logger.DEBUG_FLAG_ERROR,
                    "Unrecognized test type \'{}\'".format(test_type))
                return ""

            testing_data_dict, testing_msgs = Hvf_Test.test_hvf_obj(
                filename_root, reference_hvf_obj, test_hvf_obj, time_elapsed)
            testing_data_dict["time"] = time_elapsed

            # Print messages
            #for msg in testing_msgs:
            #	Logger.get_logger().log_msg(debug_level, msg)

            aggregate_testing_data_dict[filename_root] = testing_data_dict

        Hvf_Test.print_unit_test_aggregate_metrics(
            aggregate_testing_data_dict.values())

        metadata_error_header_list = [
            "test_name", "field_name", "expected", "actual"
        ]
        metadata_error_output = "\t".join(metadata_error_header_list) + "\n"
        for test in aggregate_testing_data_dict.keys():
            for error in aggregate_testing_data_dict[test]["metadata_errors"]:
                error_string = "\t".join(error.values()) + "\n"
                metadata_error_output = metadata_error_output + error_string

        plot_header_list = ["test_name", "location", "expected", "actual"]
        value_plot_error_output = "\t".join(plot_header_list) + "\n"
        for test in aggregate_testing_data_dict.keys():
            for error in aggregate_testing_data_dict[test][
                    "value_plot_errors"]:
                error_string = "\t".join(error.values()) + "\n"
                value_plot_error_output = value_plot_error_output + error_string

        perc_plot_error_output = "\t".join(plot_header_list) + "\n"
        for test in aggregate_testing_data_dict.keys():
            for error in aggregate_testing_data_dict[test]["perc_plot_errors"]:
                error_string = "\t".join(error.values()) + "\n"
                perc_plot_error_output = perc_plot_error_output + error_string

        if (True):
            metadata_error_output = metadata_error_output.encode(
                'ascii', 'ignore').decode('unicode_escape')
            File_Utils.write_string_to_file(metadata_error_output,
                                            sub_dir + "_metadata_errors.tsv")
            File_Utils.write_string_to_file(value_plot_error_output,
                                            sub_dir + "_value_plot_errors.tsv")
            File_Utils.write_string_to_file(perc_plot_error_output,
                                            sub_dir + "_perc_plot_errors.tsv")

        return ""

Esempio n. 17

0

Mostra file

    def get_perc_plot_element(plot_element):

        # Declare our return value:
        ret_val = Hvf_Perc_Icon.PERC_NO_VALUE

        # What is the total plot size?
        plot_area = np.size(plot_element, 0) * np.size(plot_element, 1)

        # Delete stray marks; filters out specks based on size compared to global element
        # and relative to largest contour
        plot_threshold = 0.005
        relative_threshold = 0.005
        plot_element = Image_Utils.delete_stray_marks(plot_element,
                                                      plot_threshold,
                                                      relative_threshold)

        # First, crop the white border out of the element to get just the core icon:
        x0, x1, y0, y1 = Image_Utils.crop_white_border(plot_element)

        # Calculate height and width:
        h = y1 - y0
        w = x1 - x0

        # If our bounding indices don't catch any borders (ie, x0 > x1) then its must be an
        # empty element:
        if (w < 0):
            ret_val = Hvf_Perc_Icon.PERC_NO_VALUE

        # Finding 'normal' elements is tricky because the icon is small (scaling doesn't
        # work as easily for it) and it tends to get false matching with other icons
        # However, its size is very different compared to the other icons, so just detect
        # it separately
        # If the cropped bounding box is less than 20% of the overall box, highly likely
        # normal icon
        elif ((h / np.size(plot_element, 0)) < 0.20):

            ret_val = Hvf_Perc_Icon.PERC_NORMAL

        else:

            # Grab our element icon:
            element_cropped = plot_element[y0:1 + y1, x0:1 + x1]

            # Now, we template match against all icons and look for best fit:
            best_match = None
            best_perc = None

            for ii in range(len(Hvf_Perc_Icon.template_perc_list)):

                # Scale up the plot element or perc icon, whichever is smaller
                # (meaning, scale up so they're equal, don't scale down - keep as much
                # data as we can)

                # Grab our perc icon:
                perc_icon = Hvf_Perc_Icon.template_perc_list[ii]

                min_val, max_val, min_loc, max_loc = Hvf_Perc_Icon.do_template_matching(
                    plot_element, w, h, perc_icon)

                # Check to see if this is our best fit yet:
                if (best_match is None or min_val < best_match):
                    # This is best fit - record the value and the icon type
                    best_match = min_val
                    best_perc = Hvf_Perc_Icon.enum_perc_list[ii]

                # Debug strings for matching the enum:
                debug_string = "Matching enum " + str(
                    Hvf_Perc_Icon.enum_perc_list[ii]) + "; match : " + str(
                        min_val)
                Logger.get_logger().log_msg(Logger.DEBUG_FLAG_DEBUG,
                                            debug_string)

            ret_val = best_perc

            # Now we need to ensure that all declared 5-percentile icons are true, because
            # this program often mixes up between 5-percentile and half-percentile

            if (ret_val == Hvf_Perc_Icon.PERC_5_PERCENTILE):

                # Check for contours here - we know that the 5 percentile has multiple small contours
                plot_element = cv2.bitwise_not(plot_element)

                # Find contours. Note we are using RETR_EXTERNAL, meaning no children contours (ie
                # contours within contours)
                contours, hierarchy = cv2.findContours(plot_element,
                                                       cv2.RETR_EXTERNAL,
                                                       cv2.CHAIN_APPROX_SIMPLE)

                # Now add up all the contour area
                total_cnt_area = 0
                for cnt in contours:
                    total_cnt_area = total_cnt_area + cv2.contourArea(cnt)

                # Now compare to our cropped area
                # In optimal scenario, 5-percentile takes up 25% of area; half-percentile essentially 100%
                # Delineate on 50%
                AREA_PERCENTAGE_CUTOFF = 0.5
                area_percentage = total_cnt_area / (w * h)

                Logger.get_logger().log_msg(
                    Logger.DEBUG_FLAG_DEBUG,
                    "Recheck matching betwen 5-percentile and half-percentile")
                Logger.get_logger().log_msg(
                    Logger.DEBUG_FLAG_DEBUG,
                    "Total contour area percentage: " + str(area_percentage))

                # Check to see which is better. Because we are inverting, check max value
                if (area_percentage > AREA_PERCENTAGE_CUTOFF):

                    # Half percentile is a better fit - switch our match
                    ret_val = Hvf_Perc_Icon.PERC_HALF_PERCENTILE

                    # Declare as such:
                    debug_string = "Correction: switching from 5-percentile to half-percentile"
                    Logger.get_logger().log_msg(Logger.DEBUG_FLAG_DEBUG,
                                                debug_string)

            # Debug strings for bounding box:
            debug_bound_box_string = "Bounding box: " + str(x0) + "," + str(
                y0) + " ; " + str(x1) + "," + str(y1)
            Logger.get_logger().log_msg(Logger.DEBUG_FLAG_DEBUG,
                                        debug_bound_box_string)
            debug_bound_box_dim_string = "Bounding box dimensions: " + str(
                w) + " , " + str(h)
            Logger.get_logger().log_msg(Logger.DEBUG_FLAG_DEBUG,
                                        debug_bound_box_dim_string)

            # And debug function for showing the cropped element:
            show_cropped_element_func = (lambda: cv2.imshow(
                'cropped ' + str(Hvf_Perc_Icon.i), element_cropped))
            Logger.get_logger().log_function(Logger.DEBUG_FLAG_DEBUG,
                                             show_cropped_element_func)
            Hvf_Perc_Icon.i = Hvf_Perc_Icon.i + 1

        return ret_val

Esempio n. 18

0

Mostra file

File: hvf_object_tester.py Progetto: msaifee786/hvf_extraction_script

                "--dicom",
                required=False,
                help="path to input DICOM file to test")
ap.add_argument('-t', '--test', nargs=2, required=False)
ap.add_argument("-a",
                "--add_test_case",
                nargs=4,
                required=False,
                help="adds input hvf image to test cases")
args = vars(ap.parse_args())

# Set up the logger module:
#debug_level = Logger.DEBUG_FLAG_INFO;
debug_level = Logger.DEBUG_FLAG_WARNING
#debug_level = Logger.DEBUG_FLAG_DEBUG;
msg_logger = Logger.get_logger().set_logger_level(debug_level)

###############################################################################
# SINGLE IMAGE TESTING ########################################################
###############################################################################

# If we are passed in an image, read it and show results
if (args["image"]):

    hvf_image = File_Utils.read_image_from_file(args["image"])
    Hvf_Test.test_single_image(hvf_image)

###############################################################################
# DICOM FILE TESTING ##########################################################
###############################################################################
if (args["dicom"]):