Beispiel #1
0
def paragraph_tag_cleanup(debug_dir: str, tag_dict: dict, line: str):
    """ If the tag registry shows a closed paragraph, insert an open
    paragraph tag. If it shows an open paragraph, close it and open a new
    paragraph. """
    tag_registry_file = os.path.join(debug_dir, "tag_registry.json")
    with open(tag_registry_file) as tag_registry_pre:
        tag_registry = json.load(tag_registry_pre)

    tag_closed = "0"

    if tag_registry["paragraph"] == tag_closed:
        content_update = tag_dict["paragraph-beg"]

        build_output_file.bof_processor(update_output=content_update,
                                        main_dict=main_dict)
        try:
            if logger_debug.isEnabledFor(logging.DEBUG):
                msg = str(tag_dict["paragraph-beg"] + f"{line}")
                logger_debug.error(msg)
        except AttributeError:
            logging.exception("Check setLevel for logger_debug.")

    else:
        content_update = tag_dict["paragraph-end"] + tag_dict["paragraph-beg"]
        build_output_file.bof_processor(update_output=content_update,
                                        main_dict=main_dict)
        try:
            if logger_debug.isEnabledFor(logging.DEBUG):
                msg = str(tag_dict["paragraph-end"] +
                          tag_dict["paragraph-beg"] + f"{line}")
                logger_debug.error(msg)
        except AttributeError:
            logging.exception("Check setLevel for logger_debug.")
def ti_processor(main_dict: dict, cw_text: str) -> None:
    tag_dict_file = os.path.join(main_dict["dicts_dir"], "xml_tags.json")
    with open(tag_dict_file, "r+") as tag_dict_file_pre:
        tag_dict_options = json.load(tag_dict_file_pre)
    tag_set = str(main_dict["tag_set"])
    tag_dict = tag_dict_options[tag_set]
    tag_empty = tag_dict["missing"][0]
    tag = tag_empty.replace("zzz", str(main_dict["line_to_parse"]))
    tag = tag.replace("aaa", cw_text)
    build_output_file.bof_processor(update_output=tag, main_dict=main_dict)
Beispiel #3
0
def tc_processor(main_dict: dict) -> dict:
    xml_tags_file = os.path.join(main_dict["dicts_dir"], "xml_tags.json")
    with open(xml_tags_file, "r+") as xml_tags_pre:
        xml_tags_dicts = json.load(xml_tags_pre)
    xml_tags = xml_tags_dicts[str(main_dict["tag_set"])]

    status_list = ["par", "section", "body", "bodytext", "wrapper"]
    for tag in status_list:
        tag_info = {
            "name":          tag,
            "tag_open_str":  xml_tags[tag][0],
            "tag_close_str": xml_tags[tag][1],
            "tag_setting":   "close",
            "tag_set":       main_dict["tag_set"]
        }
        main_dict, update_output = tag_check.tc_processor(
            tag_info=tag_info, main_dict=main_dict)
        build_output_file.bof_processor(
            update_output=update_output, main_dict=main_dict)
    return main_dict
Beispiel #4
0
def gc_processor(main_dict: dict, collections_dict: dict) -> dict:
    # Temp setup for testing
    for ele in main_dict["contents_list"]:
        if ele == "{" or ele == "}":
            pass
        elif re.search(main_dict["cw_regex"], ele):
            cw_text = "".join([i for i in ele if i.isalpha()])
            cw_value = "".join([i for i in ele if i.isdigit()])
            null_function = "null"
            try:
                cw_func = collections_dict[cw_text]
                if cw_func != null_function:
                    tag_set = main_dict["tag_set"]
                    tag_info = {
                        "func": cw_func,
                        "cw_text": cw_text,
                        "cw_value": cw_value,
                        "name": cw_text,
                        "tag_open": "",
                        "tag_close": "",
                        "tag_set": tag_set
                    }
                    main_dict = control_word_to_build.cwtb_processor(
                        tag_info=tag_info, main_dict=main_dict)
                else:
                    pass
            except KeyError:
                # Add missing control word to control_words_collections.csv
                # file.
                collections_dict = csv_modifier.csvm_processor(
                    main_dict=main_dict,
                    cw_text=cw_text,
                    collections_dict=collections_dict)
                # Add control word that cannot be processed to XML build
                # file.
                tag_insert_missing_cw.ti_processor(main_dict=main_dict,
                                                   cw_text=cw_text)
        else:
            build_output_file.bof_processor(update_output=ele,
                                            main_dict=main_dict)
    return main_dict
Beispiel #5
0
def bt_processor(main_dict: dict) -> dict:
    # Test for backslash character as part of text.
    text = ""
    item = None
    try:
        test = re.search(r"^(\s\\\\)", main_dict["parse_text"])
        if test is not item:
            text = "\\"
            build_output_file.bof_processor(
                update_output=text, main_dict=main_dict)
            main_dict["parse_text"] = main_dict["parse_text"].\
                replace(text, "", 1)
            main_dict["parse_index"] = 1
            main_dict = adjust_process_text.apt_processor(main_dict=main_dict)
        else:
            pass
    except TypeError:
        logging.exception(f"{main_dict['line_to_parse']}:"
                          f"{main_dict['parse_index']}--"
                          f"{main_dict['parse_text']}")
    return main_dict
Beispiel #6
0
def cwtb_processor(tag_info: dict, main_dict: dict) -> dict:
    base_dir = main_dict["base_dir"]
    cws_dir = os.path.join(base_dir, "Library/control_words_symbols/")
    try:
        tagging_mod = importlib.import_module(tag_info["func"],
                                              package=cws_dir)
    except ValueError:
        log.debug(msg=f"Module name: {tag_info['name']}")
    tag_info = tagging_mod.cw_func_processor(tag_info=tag_info,
                                             main_dict=main_dict)
    # Check whether tag is already open or closed.
    results = tag_check.tc_processor(tag_info=tag_info, main_dict=main_dict)
    main_dict = results[0]
    update_output = results[1]
    if update_output != "":
        build_output_file.bof_processor(update_output=update_output,
                                        main_dict=main_dict)
    if main_dict is None:
        log.debug("Main_dict is none.")
        sys.exit(1)
    return main_dict
Beispiel #7
0
def ct_processor(main_dict: dict) -> dict:
    # Test for text.
    item = None
    try:
        test = re.search(r"^([a-zA-Z0-9\s?.!,;:_%<>=@\-\[\]–/()\'\"“”‘’]*)",
                         main_dict["parse_text"])
        if test is not item and test[0] != "":
            text = test[0]
            build_output_file.bof_processor(update_output=test[0],
                                            main_dict=main_dict)
            main_dict["parse_text"] = main_dict["parse_text"].replace(text, "")
            main_dict["parse_index"] = 1
            main_dict = adjust_process_text.apt_processor(main_dict=main_dict)
        else:
            pass
    except TypeError:
        logging.exception(f"Check_text: "
                          f"{main_dict['processing_dict']['line_to_parse']}:"
                          f"{main_dict['processing_dict']['parse_index']}--"
                          f"{main_dict['processing_dict']['parse_text']}")
    return main_dict
Beispiel #8
0
def ofh_processor(main_dict: dict, config_settings_dict: dict) -> dict:
    header_file_dir = os.path.join(main_dict["base_dir"], "input")
    try:
        if config_settings_dict["output-file-header"] == 0:
            header_file_name = "defaultheader.xml"
        else:
            header_file_name = "tpresheader.xml"
        header_file = os.path.join(header_file_dir, header_file_name)
        with open(header_file, "r+") as header_file_pre:
            header_file_text = header_file_pre.read()
        main_dict = build_output_file.bof_processor(
            main_dict=main_dict, update_output=header_file_text)
    except FileNotFoundError as error:
        logging.exception(error, "An XML header file cannot be found or "
                          "opened.")
    return main_dict
def oft_processor(main_dict: dict, config_settings_dict: dict) -> Any:
    """ Insert the XML tags to start the document portion of the XML file
    (after the header). """
    start_tags = os.path.join(main_dict["dicts_dir"], "start_tags.json")
    transition_tags = ""
    with open(start_tags, "r+") as start_tags_pre:
        start_tag_dict = json.load(start_tags_pre)
    try:
        test_dict = {"1": start_tag_dict["1"],
                     "2": start_tag_dict["2"],
                     "3": start_tag_dict["3"]}
        transition_tags = test_dict[config_settings_dict["tag-set"]]
    except KeyError as error:
        logging.exception(error, "The tag-set number does not match an "
                                 "entry for transition tags.")
        transition_tags = start_tag_dict["1"]
    except FileNotFoundError as error:
        logging.exception(error, "The config_dict.json file is missing.")

    main_dict = build_output_file.bof_processor(
        main_dict=main_dict, update_output=transition_tags)

    return main_dict