Exemple #1
0
    def to_string(self, left_margin=0, indent=4):
        """Present the error in a human-readable text.

        :type left_margin: int
        :type indent: int
        :param left_margin: The left margin value.
        :param indent: The indent value.
        :rtype : str
        :return: The formatted string.
        """

        #  Wrap the locale options.
        lang_id = _l10n_opt.OptionWrapper(self.__opt).get_language_id()

        #  Write header.
        s = " " * left_margin + _l10n_reg.get_message(
            lang_id,
            "logic.common.error.header",
            replace_map={"$1": self.get_error_code()}) + "\n\n"

        #  Write description.
        s += " " * left_margin + _l10n_reg.get_message(
            lang_id, "logic.common.error.description") + "\n\n"
        s += " " * (left_margin + indent) + self.get_description()

        return s
Exemple #2
0
def _macro_simplify(expression, mu_obj, node, options):
    """Macro for simplifying.

    :type expression: str
    :type mu_obj: MergeUtil
    :type node: bce.parser.ast.molecule._ASTNodeBaseML
    :type options: bce.option.Option
    :param expression: The origin expression.
    :param mu_obj: The MergeUtil object.
    :param node: The work node.
    :param options: The options.
    """

    #  Get the language ID.
    lang_id = _l10n_opt.OptionWrapper(options).get_language_id()

    #  Simplify.
    removed = mu_obj.simplify()

    #  Pre-create an atom-eliminated error.
    err = _cm_error.Error(
        _ml_error.MOLECULE_ELEMENT_ELIMINATED,
        _l10n_reg.get_message(
            lang_id,
            "parser.molecule.error.element_eliminated.description"
        ),
        options
    )

    #  Initialize the error flag.
    flag = False

    for symbol in removed:
        if symbol != "e":
            #  Mark the flag.
            flag = True

            #  Add a description.
            err.push_traceback(
                expression,
                node.get_starting_position_in_source_text(),
                node.get_ending_position_in_source_text(),
                _l10n_reg.get_message(
                    lang_id,
                    "parser.molecule.error.element_eliminated.message",
                    replace_map={
                        "$1": symbol
                    }
                )
            )

    #  Raise the error if the flag was marked.
    if flag:
        raise err
Exemple #3
0
    def to_string(self, left_margin=0, indent=4):
        """Present the error in a human-readable form(string).

        :type left_margin: int
        :type indent: int
        :param left_margin: The left margin value.
        :param indent: The indent spaces count.
        :rtype : str
        :return: The formatted string.
        """

        #  Get the language ID.
        lang_id = _l10n_opt.OptionWrapper(self.__opt).get_language_id()

        #  Write header.
        s = " " * left_margin + _l10n_reg.get_message(
            lang_id,
            "parser.common.error.header",
            replace_map={
                "$1": self.get_error_code()
            }
        ) + "\n\n"

        #  Write description.
        s += " " * left_margin + _l10n_reg.get_message(
            lang_id,
            "parser.common.error.description"
        ) + "\n\n"
        s += " " * (left_margin + indent) + self.__description

        #  Write traceback items if have.
        if len(self.__traceback) != 0:
            #  Write traceback header.
            s += "\n\n" + " " * left_margin + _l10n_reg.get_message(
                lang_id,
                "parser.common.error.traceback"
            )

            #  Write all traceback items.
            i = len(self.__traceback) - 1
            while i >= 0:
                s += "\n\n" + self.__traceback[i].to_string(left_margin + indent, "^")
                i -= 1

        return s
Exemple #4
0
def _macro_register_form(expression, origin_form, new_form, options):
    """Macro of registering new form.

    :type expression: str
    :type origin_form: int
    :type new_form: int
    :type options: bce.option.Option
    :param expression: The chemical expression.
    :param origin_form: The origin form.
    :param new_form: The new form.
    :param options: The options.
    :rtype : int
    :return: The new form if no conflict exists.
    """

    #  Get the language ID.
    lang_id = _l10n_opt.OptionWrapper(options).get_language_id()

    if origin_form is not None and origin_form != new_form:
        err = _cm_error.Error(
            _cexp_error.CEXP_MIXED_FORM,
            _l10n_reg.get_message(
                lang_id,
                "parser.cexp.error.mixed_form.description"
            ),
            options
        )
        err.push_traceback(
            expression,
            0,
            len(expression) - 1,
            _l10n_reg.get_message(
                lang_id,
                "parser.cexp.error.mixed_form.message"
            )
        )
        raise err

    return new_form
Exemple #5
0
def _check_right_operand(expression, token_list, token_id, options):
    """Check the right operand.

    :type expression: str
    :type token_list: list[bce.parser.mexp.token.Token]
    :type token_id: int
    :type options: bce.option.Option
    :param expression: (The same as the variable in parse_to_rpn() routine.)
    :param token_list: (The same as the variable in parse_to_rpn() routine.)
    :param token_id: (The same as the variable in parse_to_rpn() routine.)
    :param options: (The same as the variable in parse_to_rpn() routine.)
    :raise _cm_error.Error: Raise when there's no right operand.
    """

    raise_err = False
    if token_id + 1 == len(token_list):
        raise_err = True
    else:
        next_tok = token_list[token_id + 1]
        if not (next_tok.is_left_parenthesis() or next_tok.is_operand()
                or next_tok.is_function()):
            raise_err = True

    if raise_err:
        #  Get the language ID.
        lang_id = _l10n_opt.OptionWrapper(options).get_language_id()

        #  Raise the error.
        err_pos = token_list[token_id].get_position()
        err = _cm_error.Error(
            _mexp_errors.MEXP_MISSING_OPERAND,
            _l10n_reg.get_message(
                lang_id, "parser.mexp.error.missing_operand.description"),
            options)
        err.push_traceback(
            expression, err_pos, err_pos,
            _l10n_reg.get_message(lang_id,
                                  "parser.mexp.error.missing_operand.right"))
        raise err
Exemple #6
0
def parse_ast(expression, root_node, options, mexp_protected_header_enabled=False, mexp_protected_header_prefix="X"):
    """Parse an AST.

    :type expression: str
    :type root_node: bce.parser.ast.molecule.ASTNodeHydrateGroup | bce.parser.ast.molecule.ASTNodeMolecule
    :type options: bce.option.Option
    :type mexp_protected_header_enabled: bool
    :type mexp_protected_header_prefix: str
    :param expression: The origin expression.
    :param root_node: The root node of the AST.
    :param options: The options.
    :param mexp_protected_header_enabled: Whether the MEXP protected headers are enabled.
    :param mexp_protected_header_prefix: The prefix of the MEXP protected headers.
    :rtype : dict
    :return: The parsed atoms dictionary.
    """

    #  Wrap the interface option.
    if_opt = _interface_opt.OptionWrapper(options)

    #  Wrap the molecule option.
    molecule_opt = _ml_opt.OptionWrapper(options)

    #  Get the language ID.
    lang_id = _l10n_opt.OptionWrapper(options).get_language_id()

    #  Get the iteration order.
    work_list = _ml_ast_bfs.do_bfs(root_node, True)

    #  Initialize the parsed node container.
    parsed = {}
    """:type : dict[int, MergeUtil]"""

    #  Iterate nodes from the leaves to the root.
    for work_node in work_list:
        if work_node.is_hydrate_group() or work_node.is_molecule():
            assert isinstance(work_node, _ast_base.ASTNodeHydrateGroup) or \
                isinstance(work_node, _ast_base.ASTNodeMolecule)

            #  Get the prefix number.
            coeff = work_node.get_prefix_number()

            #  Initialize a new merge utility.
            build = MergeUtil()

            #  Process the electronics.
            if work_node.is_molecule():
                el_charge = work_node.get_electronic_count().simplify()
                if not el_charge.is_zero:
                    build.add("e", el_charge * coeff)

            #  Iterate all children.
            for child_id in range(0, len(work_node)):
                #  Get child node and its parsing result.
                child = work_node[child_id]
                child_parsed = parsed[id(child)]

                #  Content check.
                if work_node.is_hydrate_group() and len(child_parsed) == 0:
                    assert isinstance(child, _ast_base.ASTNodeMolecule)

                    err = _cm_error.Error(
                        _ml_error.MOLECULE_NO_CONTENT,
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.molecule.error.no_content.description"
                        ),
                        options
                    )

                    if child_id == 0:
                        err.push_traceback(
                            expression,
                            child.get_ending_position_in_source_text() + 1,
                            child.get_ending_position_in_source_text() + 1,
                            _l10n_reg.get_message(
                                lang_id,
                                "parser.molecule.error.no_content.before"
                            )
                        )
                    elif child_id == len(work_node) - 1:
                        err.push_traceback(
                            expression,
                            child.get_starting_position_in_source_text() - 1,
                            child.get_starting_position_in_source_text() - 1,
                            _l10n_reg.get_message(
                                lang_id,
                                "parser.molecule.error.no_content.after"
                            )
                        )
                    else:
                        err.push_traceback(
                            expression,
                            child.get_starting_position_in_source_text() - 1,
                            child.get_ending_position_in_source_text() + 1,
                            _l10n_reg.get_message(
                                lang_id,
                                "parser.molecule.error.no_content.inside"
                            )
                        )

                    raise err

                #  Merge.
                build.merge(child_parsed, coeff)

            #  Do simplifying.
            _macro_simplify(expression, build, work_node, options)

            #  Save the parsed result.
            parsed[id(work_node)] = build
        elif work_node.is_atom():
            assert isinstance(work_node, _ast_base.ASTNodeAtom)

            #  Get suffix number.
            coeff = work_node.get_suffix_number()

            #  Initialize a new merge utility.
            build = MergeUtil()

            #  Add the atom.
            build.add(work_node.get_atom_symbol(), coeff)

            #  Save the parsed result.
            parsed[id(work_node)] = build
        elif work_node.is_parenthesis():
            assert isinstance(work_node, _ast_base.ASTNodeParenthesisWrapper)

            #  Get suffix number.
            coeff = work_node.get_suffix_number()

            #  Initialize a new merge utility.
            build = MergeUtil()

            #  Get inner node and its parsing result.
            inner_parsed = parsed[id(work_node.get_inner_node())]

            #  Content check.
            if len(inner_parsed) == 0:
                err = _cm_error.Error(
                    _ml_error.MOLECULE_NO_CONTENT,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.no_content.description"
                    ),
                    options
                )
                err.push_traceback(
                    expression,
                    work_node.get_starting_position_in_source_text(),
                    work_node.get_ending_position_in_source_text(),
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.no_content.inside"
                    )
                )
                raise err

            #  Merge.
            build.merge(inner_parsed, coeff)

            #  Do simplifying.
            _macro_simplify(expression, build, work_node, options)

            #  Save the parsed result.
            parsed[id(work_node)] = build
        elif work_node.is_abbreviation():
            assert isinstance(work_node, _ast_base.ASTNodeAbbreviation)

            #  Get the abbreviation symbol.
            abbr_symbol = work_node.get_abbreviation_symbol()

            #  Check symbol length.
            if len(abbr_symbol) == 0:
                err = _cm_error.Error(
                    _ml_error.MOLECULE_NO_CONTENT,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.no_content.description"
                    ),
                    options
                )
                err.push_traceback(
                    expression,
                    work_node.get_starting_position_in_source_text(),
                    work_node.get_ending_position_in_source_text(),
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.no_content.inside"
                    )
                )
                raise err

            #  Get the abbreviation mapping.
            abbr_mapping = molecule_opt.get_abbreviation_mapping()

            #  Check the existence.
            if abbr_symbol not in abbr_mapping:
                err = _cm_error.Error(
                    _ml_error.MOLECULE_UNSUPPORTED_ABBREVIATION,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.unsupported_abbreviation.description"
                    ),
                    options
                )
                err.push_traceback(
                    expression,
                    work_node.get_starting_position_in_source_text() + 1,
                    work_node.get_ending_position_in_source_text() - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.unsupported_abbreviation.message"
                    )
                )
                raise err

            abbr_expression = abbr_mapping[abbr_symbol]

            try:
                abbr_parser = if_opt.get_molecule_parser()
                abbr_ast_root = abbr_parser.parse_expression(
                    abbr_expression,
                    options,
                    mexp_protected_header_enabled=mexp_protected_header_enabled,
                    mexp_protected_header_prefix=mexp_protected_header_prefix
                )
                abbr_resolved = abbr_parser.parse_ast(
                    abbr_expression,
                    abbr_ast_root,
                    options,
                    mexp_protected_header_enabled=mexp_protected_header_enabled,
                    mexp_protected_header_prefix=mexp_protected_header_prefix
                )
            except _cm_error.Error as err:
                err.push_traceback(
                    abbr_expression,
                    0,
                    len(abbr_expression) - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.parsing_abbreviation.expand"
                    )
                )
                err.push_traceback(
                    expression,
                    work_node.get_starting_position_in_source_text() + 1,
                    work_node.get_ending_position_in_source_text() - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.parsing_abbreviation.origin"
                    )
                )
                raise err

            #  Initialize a new merge utility.
            build = MergeUtil()

            #  Get the suffix number.
            coeff = work_node.get_suffix_number()

            #  Add atoms.
            for atom_symbol in abbr_resolved:
                build.add(atom_symbol, abbr_resolved[atom_symbol] * coeff)

            #  Do simplifying.
            _macro_simplify(expression, build, work_node, options)

            #  Save the parsed result.
            parsed[id(work_node)] = build
        else:
            raise RuntimeError("Never reach this condition.")

    #  Get the parsing result of the root node.
    root_node_parsed = parsed[id(root_node)]

    #  Content check.
    if len(root_node_parsed) == 0:
        err = _cm_error.Error(
            _ml_error.MOLECULE_NO_CONTENT,
            _l10n_reg.get_message(
                lang_id,
                "parser.molecule.error.no_content.description"
            ),
            options
        )
        err.push_traceback(
            expression,
            0,
            len(expression) - 1,
            _l10n_reg.get_message(
                lang_id,
                "parser.molecule.error.no_content.inside"
            )
        )
        raise err

    return root_node_parsed.get_data()
Exemple #7
0
def tokenize(expression,
             options,
             mexp_protected_header_enabled=False,
             mexp_protected_header_prefix="X"):
    """Tokenize a molecule expression.

    :type expression: str
    :type options: bce.option.Option
    :type mexp_protected_header_enabled: bool
    :type mexp_protected_header_prefix: str
    :param expression: The expression.
    :param options: The options.
    :param mexp_protected_header_enabled: Whether the MEXP protected headers are enabled.
    :param mexp_protected_header_prefix: The prefix of the MEXP protected headers.
    :rtype : list[Token]
    :return: The token list.
    :raise bce.parser.common.error.Error: Raise when a parser error occurred.
    """

    #  Initialize.
    lang_id = _l10n_opt.OptionWrapper(options).get_language_id()
    if_opt = _interface_opt.OptionWrapper(options)
    result = []
    cur_pos = 0
    end_pos = len(expression)

    while cur_pos < end_pos:
        cur_ch = expression[cur_pos]

        #  Read a integer token if current character is a digit.
        if cur_ch.isdigit():
            #  Search for the next non-digit character.
            search_pos = cur_pos + 1
            search_end = end_pos

            while search_pos < end_pos:
                search_ch = expression[search_pos]

                if not search_ch.isdigit():
                    search_end = search_pos
                    break

                #  Go to next searching position.
                search_pos += 1

            #  Create an integer token.
            result.append(
                create_integer_operand_token(expression[cur_pos:search_end],
                                             len(result), cur_pos))

            #  Go to next position.
            cur_pos = search_end

            continue

        #  Read an atom symbol if current character is a upper-case alphabet.
        if cur_ch.isupper():
            #  Search for next non-lower-case character.
            search_pos = cur_pos + 1
            search_end = end_pos

            while search_pos < end_pos:
                if not expression[search_pos].islower():
                    search_end = search_pos
                    break

                #  Go to next searching position.
                search_pos += 1

            #  Create a symbol token.
            result.append(
                create_symbol_token(expression[cur_pos:search_end],
                                    len(result), cur_pos))

            #  Go to next position.
            cur_pos = search_end

            continue

        #  Read a hydrate-dot token if current character is a dot.
        if cur_ch == ".":
            #  Create a dot token.
            result.append(create_hydrate_dot_token(len(result), cur_pos))

            #  Go to next position.
            cur_pos += 1

            continue

        if expression.startswith("(g)", cur_pos):
            #  Create a status descriptor token.
            result.append(create_gas_status_token(len(result), cur_pos))

            #  Go to next position.
            cur_pos += 3

            continue

        if expression.startswith("(l)", cur_pos):
            #  Create a status descriptor token.
            result.append(create_liquid_status_token(len(result), cur_pos))

            #  Go to next position.
            cur_pos += 3

            continue

        if expression.startswith("(s)", cur_pos):
            #  Create a status descriptor token.
            result.append(create_solid_status_token(len(result), cur_pos))

            #  Go to next position.
            cur_pos += 3

            continue

        if expression.startswith("(aq)", cur_pos):
            #  Create a status descriptor token.
            result.append(create_aqueous_status_token(len(result), cur_pos))

            #  Go to next position.
            cur_pos += 4

            continue

        #  Read a normal left parenthesis if current character is '('.
        if cur_ch == "(":
            #  Create a left parenthesis token.
            result.append(create_left_parenthesis_token(len(result), cur_pos))

            #  Go to next position.
            cur_pos += 1

            continue

        #  Read a normal right parenthesis if current character is ')'.
        if cur_ch == ")":
            #  Create a right parenthesis token.
            result.append(create_right_parenthesis_token(len(result), cur_pos))

            #  Go to next position.
            cur_pos += 1

            continue

        #  Read a abbreviation if current character is '['.
        if cur_ch == "[":
            #  Find the ']'.
            search_end = -1
            search_pos = cur_pos + 1

            while search_pos < end_pos:
                if expression[search_pos] == "]":
                    search_end = search_pos + 1
                    break

                #  Go to next searching position.
                search_pos += 1

            #  Raise an error if we can't find the ']'.
            if search_end == -1:
                err = _cm_error.Error(
                    _ml_error.MOLECULE_PARENTHESIS_MISMATCH,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.parenthesis_mismatch.description"
                    ), options)
                err.push_traceback(
                    expression, cur_pos, cur_pos,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.parenthesis_mismatch.right"))
                raise err

            #  Create an abbreviation token.
            result.append(
                create_abbreviation_token(expression[cur_pos:search_end],
                                          len(result), cur_pos))

            #  Go to next position.
            cur_pos = search_end

            continue

        #  Read a math expression if current character is '{'.
        if cur_ch == "{":
            #  Simulate a parenthesis stack to find the end '}'.
            p_mexp = 0

            #  Searching the end '}'.
            search_end = -1
            search_pos = cur_pos + 1

            while search_pos < end_pos:
                search_ch = expression[search_pos]

                if search_ch == "(" or search_ch == "[" or search_ch == "{":
                    #  If current character is a left parenthesis, push it onto the stack.
                    p_mexp += 1
                elif search_ch == ")" or search_ch == "]" or search_ch == "}":
                    #  When we meet a right parenthesis and there's no left parenthesis in the stack.
                    #  The parenthesis we met should be the end '}'.
                    if p_mexp == 0:
                        #  Raise an error if the parenthesis isn't '}'.
                        if search_ch != "}":
                            err = _cm_error.Error(
                                _ml_error.MOLECULE_PARENTHESIS_MISMATCH,
                                _l10n_reg.get_message(
                                    lang_id,
                                    "parser.molecule.error.parenthesis_mismatch.description"
                                ), options)
                            err.push_traceback(
                                expression, search_pos, search_pos,
                                _l10n_reg.get_message(
                                    lang_id,
                                    "parser.molecule.error.parenthesis_mismatch.incorrect",
                                    replace_map={"$1": "}"}))
                            raise err

                        #  Set the end position.
                        search_end = search_pos + 1

                        break

                    #  Pop the parenthesis off from the stack.
                    p_mexp -= 1
                else:
                    pass

                #  Go to next searching position.
                search_pos += 1

            #  Raise an error if we can't find the end '}'.
            if search_end == -1:
                err = _cm_error.Error(
                    _ml_error.MOLECULE_PARENTHESIS_MISMATCH,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.parenthesis_mismatch.description"
                    ), options)
                err.push_traceback(
                    expression, cur_pos, cur_pos,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.parenthesis_mismatch.right"))
                raise err

            #  Raise an error if the math expression has no content.
            if cur_pos + 2 == search_end:
                err = _cm_error.Error(
                    _ml_error.MOLECULE_NO_CONTENT,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.no_content.description"),
                    options)
                err.push_traceback(
                    expression, cur_pos, cur_pos + 1,
                    _l10n_reg.get_message(
                        lang_id, "parser.molecule.error.no_content.inside"))
                raise err

            #  Get the expression.
            mexp_expr = expression[cur_pos:search_end]

            #  Evaluate the expression.
            try:
                ev_value = if_opt.get_mexp_parser().parse(
                    mexp_expr,
                    options,
                    protected_header_enabled=mexp_protected_header_enabled,
                    protected_header_prefix=mexp_protected_header_prefix)
            except _cm_error.Error as err:
                err.push_traceback(
                    expression, cur_pos, search_end - 1,
                    _l10n_reg.get_message(
                        lang_id, "parser.molecule.error.parsing_mexp.message"))
                raise err

            #  Create a math expression token.
            result.append(
                create_mexp_operand_token(mexp_expr, ev_value, len(result),
                                          cur_pos))

            #  Go to next position.
            cur_pos = search_end

            continue

        if cur_ch == "<":
            #  Create an electronic begin parenthesis token.
            result.append(create_electronic_begin_token(len(result), cur_pos))

            #  Go to next position.
            cur_pos += 1

            continue

        if cur_ch == ">":
            #  Create an electronic begin parenthesis token.
            result.append(create_electronic_end_token(len(result), cur_pos))

            #  Go to next position.
            cur_pos += 1

            continue

        if expression.startswith("e+", cur_pos):
            #  Create a positive electronic flag token.
            result.append(
                create_positive_electronic_flag_token(len(result), cur_pos))

            #  Go to next position.
            cur_pos += 2

            continue

        if expression.startswith("e-", cur_pos):
            #  Create a negative electronic flag token.
            result.append(
                create_negative_electronic_flag_token(len(result), cur_pos))

            #  Go to next position.
            cur_pos += 2

            continue

        #  Raise an error if current character can't be tokenized.
        err = _cm_error.Error(
            _ml_error.MOLECULE_UNRECOGNIZED_TOKEN,
            _l10n_reg.get_message(
                lang_id,
                "parser.molecule.error.unrecognized_token.description"),
            options)
        err.push_traceback(
            expression, cur_pos, cur_pos,
            _l10n_reg.get_message(
                lang_id, "parser.molecule.error.unrecognized_token.message"))
        raise err

    #  Add an end token.
    result.append(create_end_token(len(result), len(expression)))

    return result
Exemple #8
0
def parse(expression, token_list, options, mexp_protected_header_enabled=False, mexp_protected_header_prefix="X"):
    """Parse the tokenized chemical equation.

    :type expression: str
    :type token_list: list[bce.parser.cexp.token.Token]
    :type options: bce.option.Option
    :type mexp_protected_header_enabled: bool
    :type mexp_protected_header_prefix: str
    :param expression: Origin chemical equation.
    :param token_list: The tokenized chemical equation.
    :param options: The options.
    :param mexp_protected_header_enabled: Whether the MEXP protected headers are enabled.
    :param mexp_protected_header_prefix: The prefix of the MEXP protected headers.
    :rtype : bce.parser.interface.cexp_parser.ChemicalEquation
    :return: The parsed chemical equation.
    """

    #  Wrap the interface option.
    if_opt = _interface_opt.OptionWrapper(options)

    #  Get the language ID.
    lang_id = _l10n_opt.OptionWrapper(options).get_language_id()

    #  Initialize an empty chemical equation.
    ret = _cexp_interface.ChemicalEquation()

    #  Initialize the sign.
    operator = _cexp_interface.OPERATOR_PLUS

    #  Initialize the form container.
    form = None

    #  Initialize the side mark.
    #  (side == False: Left side; side == True: Right side;)
    side = False

    #  Initialize the state.
    state = _STATE_ROUTE_1

    #  Initialize other variables.
    read_molecule_end = None
    equal_sign_position = -1

    #  Initialize the token cursor.
    cursor = 0
    while True:
        token = token_list[cursor]

        if state == _STATE_ROUTE_1:
            #  Reset the operator to '+'.
            operator = _cexp_interface.OPERATOR_PLUS

            #  Redirect by rules.
            if token.is_operator_minus():
                #  Go to read the '-'.
                state = _STATE_READ_MINUS_1
            else:
                #  Go and try to read a molecule.
                read_molecule_end = _STATE_ROUTE_2
                state = _STATE_READ_MOLECULE
        elif state == _STATE_READ_MINUS_1:
            #  Register the new form.
            form = _macro_register_form(expression, form, _FORM_NORMAL, options)

            #  Set the operator to '-'.
            operator = _cexp_interface.OPERATOR_MINUS

            #  Next token.
            cursor += 1

            #  Go to read-molecule state.
            read_molecule_end = _STATE_ROUTE_2
            state = _STATE_READ_MOLECULE
        elif state == _STATE_READ_MOLECULE:
            if not token.is_molecule():
                if token.is_end():
                    if cursor == 0:
                        #  In this condition, we got an empty expression. Raise an error.
                        err = _cm_error.Error(
                            _cexp_error.CEXP_EMPTY_EXPRESSION,
                            _l10n_reg.get_message(
                                lang_id,
                                "parser.cexp.error.empty_expression.description"
                            ),
                            options
                        )
                        raise err
                    else:
                        #  There is no content between the end token and previous token. Raise an error.
                        err = _cm_error.Error(
                            _cexp_error.CEXP_NO_CONTENT,
                            _l10n_reg.get_message(
                                lang_id,
                                "parser.cexp.error.no_content.description"
                            ),
                            options
                        )
                        err.push_traceback(
                            expression,
                            token.get_position() - 1,
                            token.get_position() - 1,
                            _l10n_reg.get_message(
                                lang_id,
                                "parser.cexp.error.no_content.operator_after"
                            )
                        )
                        raise err
                else:
                    err = _cm_error.Error(
                        _cexp_error.CEXP_NO_CONTENT,
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.cexp.error.no_content.description"
                        ),
                        options
                    )
                    if cursor == 0:
                        #  There is no content before this token. Raise an error.
                        err.push_traceback(
                            expression,
                            token.get_position(),
                            token.get_position() + len(token.get_symbol()) - 1,
                            _l10n_reg.get_message(
                                lang_id,
                                "parser.cexp.error.no_content.operator_before"
                            )
                        )
                    else:
                        #  There is no content between this token and previous token. Raise an error.
                        err.push_traceback(
                            expression,
                            token.get_position() - 1,
                            token.get_position() + len(token.get_symbol()) - 1,
                            _l10n_reg.get_message(
                                lang_id,
                                "parser.cexp.error.no_content.operator_between"
                            )
                        )
                    raise err

            try:
                #  Get the molecule parser.
                ml_parser = if_opt.get_molecule_parser()

                #  Parse the molecule.
                ml_ast_root = ml_parser.parse_expression(
                    token.get_symbol(),
                    options,
                    mexp_protected_header_enabled=mexp_protected_header_enabled,
                    mexp_protected_header_prefix=mexp_protected_header_prefix
                )

                #  Separate the coefficient from the AST.
                ml_coefficient = ml_ast_root.get_prefix_number()
                ml_ast_root.set_prefix_number(_math_cst.ONE)

                #  Parse the AST.
                ml_atoms_dict = ml_parser.parse_ast(
                    token.get_symbol(),
                    ml_ast_root,
                    options,
                    mexp_protected_header_enabled=mexp_protected_header_enabled,
                    mexp_protected_header_prefix=mexp_protected_header_prefix
                )

                #  Add the molecule to the chemical equation.
                if side:
                    ret.append_right_item(operator, ml_coefficient, ml_ast_root, ml_atoms_dict)
                else:
                    ret.append_left_item(operator, ml_coefficient, ml_ast_root, ml_atoms_dict)
            except _cm_error.Error as err:
                #  Add error description.
                err.push_traceback(
                    expression,
                    token.get_position(),
                    token.get_position() + len(token.get_symbol()) - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.cexp.error.parsing_molecule.message"
                    )
                )
                raise err

            #  Next token.
            cursor += 1

            #  Redirect by pre-saved state.
            state = read_molecule_end
        elif state == _STATE_ROUTE_2:
            #  Redirect by rules.
            if token.is_operator_plus():
                state = _STATE_READ_PLUS
            elif token.is_operator_minus():
                state = _STATE_READ_MINUS_2
            elif token.is_operator_separator():
                state = _STATE_READ_SEPARATOR
            elif token.is_equal():
                state = _STATE_READ_EQUAL_SIGN
            elif token.is_end():
                break
            else:
                raise RuntimeError("BUG: Unexpected token (should never happen).")
        elif state == _STATE_READ_PLUS:
            #  Register the new form.
            form = _macro_register_form(expression, form, _FORM_NORMAL, options)

            #  Set the operator to '+'.
            operator = _cexp_interface.OPERATOR_PLUS

            #  Next token.
            cursor += 1

            #  Go to read-molecule state.
            read_molecule_end = _STATE_ROUTE_2
            state = _STATE_READ_MOLECULE
        elif state == _STATE_READ_MINUS_2:
            #  Register the new form.
            form = _macro_register_form(expression, form, _FORM_NORMAL, options)

            #  Set the operator to '-'.
            operator = _cexp_interface.OPERATOR_MINUS

            #  Next token.
            cursor += 1

            #  Go to read-molecule state.
            read_molecule_end = _STATE_ROUTE_2
            state = _STATE_READ_MOLECULE
        elif state == _STATE_READ_SEPARATOR:
            #  Register the new form.
            form = _macro_register_form(expression, form, _FORM_AUTO_CORRECTION, options)

            #  Set the operator to '+'.
            operator = _cexp_interface.OPERATOR_PLUS

            #  Next token.
            cursor += 1

            #  Go to read-molecule state.
            read_molecule_end = _STATE_ROUTE_2
            state = _STATE_READ_MOLECULE
        elif state == _STATE_READ_EQUAL_SIGN:
            #  Register the new form.
            form = _macro_register_form(expression, form, _FORM_NORMAL, options)

            #  Next token.
            cursor += 1

            #  Raise an error if the equal sign is duplicated.
            if side:
                err = _cm_error.Error(
                    _cexp_error.CEXP_DUPLICATED_EQUAL_SIGN,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.cexp.error.duplicated_equal_sign.description"
                    ),
                    options
                )
                err.push_traceback(
                    expression,
                    token.get_position(),
                    token.get_position() + len(token.get_symbol()) - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.cexp.error.duplicated_equal_sign.duplicated"
                    )
                )
                err.push_traceback(
                    expression,
                    equal_sign_position,
                    equal_sign_position,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.cexp.error.duplicated_equal_sign.previous"
                    )
                )
                raise err

            #  Save the position of the equal sign.
            equal_sign_position = token.get_position()

            #  Mark the side flag.
            side = True

            #  Go to route 1.
            state = _STATE_ROUTE_1
        else:
            raise RuntimeError("BUG: Unexpected state.")

    #  Raise an error if there is only 1 molecule.
    if len(ret) == 1:
        err = _cm_error.Error(
            _cexp_error.CEXP_ONLY_ONE_MOLECULE,
            _l10n_reg.get_message(
                lang_id,
                "parser.cexp.error.only_one_molecule.description"
            ),
            options
        )
        err.push_traceback(
            expression,
            0,
            len(expression) - 1,
            _l10n_reg.get_message(
                lang_id,
                "parser.cexp.error.only_one_molecule.message"
            )
        )
        raise err

    #  Check form.
    if form is None:
        raise RuntimeError("BUG: Form was not set.")

    #  Raise an error if there is no equal sign (for normal form only).
    if form == _FORM_NORMAL and not side:
        err = _cm_error.Error(
            _cexp_error.CEXP_NO_EQUAL_SIGN,
            _l10n_reg.get_message(
                lang_id,
                "parser.cexp.error.no_equal_sign.description"
            ),
            options
        )
        err.push_traceback(
            expression,
            0,
            len(expression) - 1,
            _l10n_reg.get_message(
                lang_id,
                "parser.cexp.error.no_equal_sign.message"
            )
        )
        raise err

    return ret
Exemple #9
0
def balance_chemical_equation(cexp_object, options, unknown_header="X"):
    """Balance a chemical equation.

    :type cexp_object: bce.parser.interface.cexp_parser.ChemicalEquation
    :type options: bce.option.Option
    :type unknown_header: str
    :param cexp_object: The chemical equation object.
    :param options: The options.
    :param unknown_header: The header of unknowns.
    """

    #  Get the language ID.
    lang_id = _l10n_opt.OptionWrapper(options).get_language_id()

    #  Wrap the balancer options.
    balancer_opt = _bce_option.OptionWrapper(options)

    #  Get enabled features.
    is_error_correction_enabled = balancer_opt.is_error_correction_feature_enabled(
    )
    is_auto_arranging_enabled = balancer_opt.is_auto_side_arranging_feature_enabled(
    )

    #  Get whether the chemical equation is in auto-arranging form.
    is_auto_arranging_form = (cexp_object.get_right_item_count() == 0)

    #  Raise an error if the chemical equation is in auto-arranging form without the feature enabled.
    if is_auto_arranging_form and not is_auto_arranging_enabled:
        raise _cm_error.Error(
            _bce_error.BALANCER_FEATURE_DISABLED,
            _l10n_reg.get_message(
                lang_id,
                "logic.balancer.error.feature_disabled.auto_arranging"),
            options)

    #  Build a matrix and backup.
    equations = _bce_model.build_model_equations(cexp_object)

    #  Solve the equation and check the answer.
    solved = _math_equation.solve_equations(equations)

    #  Post solving.
    coefficients = _bce_model.generate_balanced_coefficients(
        solved, header=unknown_header)

    #  Merge.
    _bce_merger.merge_coefficients_with_cexp_object(cexp_object, coefficients)

    #  Remove items with coefficient 0.
    if is_error_correction_enabled:
        cexp_object.remove_items_with_coefficient_zero()

    #  Move items that have negative coefficient to another side.
    if is_auto_arranging_form or is_error_correction_enabled:
        cexp_object.move_items_with_negative_coefficient_to_another_side()

    #  Check balancing errors in left items.
    for idx in range(0, cexp_object.get_left_item_count()):
        #  Get the coefficient.
        coefficient = cexp_object.get_left_item(idx).get_coefficient()

        #  Simplify before checking.
        coefficient = coefficient.simplify()

        #  Check.
        if coefficient.is_negative or coefficient.is_zero:
            raise _cm_error.Error(
                _bce_error.BALANCER_FEATURE_DISABLED,
                _l10n_reg.get_message(
                    lang_id,
                    "logic.balancer.error.feature_disabled.error_correction"),
                options)

    #  Check balancing errors in right items.
    for idx in range(0, cexp_object.get_right_item_count()):
        #  Get the coefficient.
        coefficient = cexp_object.get_right_item(idx).get_coefficient()

        #  Simplify before checking.
        coefficient = coefficient.simplify()

        #  Check.
        if coefficient.is_negative or coefficient.is_zero:
            raise _cm_error.Error(
                _bce_error.BALANCER_FEATURE_DISABLED,
                _l10n_reg.get_message(
                    lang_id,
                    "logic.balancer.error.feature_disabled.error_correction"),
                options)

    #  Integerize the coefficients.
    cexp_object.coefficients_integerize()

    #  'All-eliminated' check.
    if len(cexp_object) == 0:
        raise _cm_error.Error(
            _bce_error.BALANCER_SIDE_ELIMINATED,
            _l10n_reg.get_message(lang_id,
                                  "logic.balancer.error.side_eliminated.all"),
            options)

    #  'Auto-arranging form with multiple answer' check.
    if is_auto_arranging_form and (cexp_object.get_left_item_count() == 0
                                   or cexp_object.get_right_item_count() == 0):
        raise _cm_error.Error(
            _bce_error.BALANCER_SIDE_ELIMINATED,
            _l10n_reg.get_message(
                lang_id,
                "logic.balancer.error.auto_arrange_with_multiple_answers.description"
            ), options)

    #  'Left side eliminated' check.
    if cexp_object.get_left_item_count() == 0:
        raise _cm_error.Error(
            _bce_error.BALANCER_SIDE_ELIMINATED,
            _l10n_reg.get_message(lang_id,
                                  "logic.balancer.error.side_eliminated.left"),
            options)

    #  'Right side eliminated' check.
    if cexp_object.get_right_item_count() == 0:
        raise _cm_error.Error(
            _bce_error.BALANCER_SIDE_ELIMINATED,
            _l10n_reg.get_message(
                lang_id, "logic.balancer.error.side_eliminated.right"),
            options)

    #  Guess direction if the form is auto-correction.
    if is_auto_arranging_form and _bce_direct.guess_reaction_direction(
            cexp_object) == _bce_direct.GSD_RIGHT_TO_LEFT:
        cexp_object.flip()
Exemple #10
0
def tokenize(expression, options):
    """Tokenize a math expression.

    :type expression: str
    :type options: bce.option.Option
    :param expression: The math expression.
    :param options: The options.
    :rtype : list[Token]
    :return: The token list.
    :raise bce.parser.common.error.Error: Raise when meet a parser error.
    """

    #  Initialize.
    result = []
    cursor = 0
    end_position = len(expression)
    prev_tok = None
    lang_id = _l10n_opt.OptionWrapper(options).get_language_id()

    while cursor < end_position:
        cur_ch = expression[cursor]

        #  Get previous token if possible.
        if len(result) != 0:
            prev_tok = result[-1]

        #  Read a number token if current character is a digit.
        if cur_ch.isdigit():
            #  Search for next non-digit and non-dot character.
            met_dot = False
            prev_dot_pos = -1
            search_pos = cursor + 1
            search_end = end_position

            while search_pos < end_position:
                search_ch = expression[search_pos]
                if search_ch == ".":
                    #  If we met decimal dot more than once, raise an duplicated-dot error.
                    if met_dot:
                        err = _cm_error.Error(
                            _mexp_errors.MEXP_DUPLICATED_DECIMAL_DOT,
                            _l10n_reg.get_message(
                                lang_id,
                                "parser.mexp.error.duplicated_decimal_dot.description"
                            ), options)
                        err.push_traceback(
                            expression, search_pos, search_pos,
                            _l10n_reg.get_message(
                                lang_id,
                                "parser.mexp.error.duplicated_decimal_dot.duplicated_dot"
                            ))
                        err.push_traceback(
                            expression, prev_dot_pos, prev_dot_pos,
                            _l10n_reg.get_message(
                                lang_id,
                                "parser.mexp.error.duplicated_decimal_dot.previous_dot"
                            ))

                        raise err
                    else:
                        met_dot = True
                        prev_dot_pos = search_pos
                else:
                    if not search_ch.isdigit():
                        search_end = search_pos
                        break

                #  Go to next searching position.
                search_pos += 1

            if met_dot:
                #  Create a float token if there's a decimal dot in the sequence.
                result.append(
                    create_float_operand_token(expression[cursor:search_end],
                                               len(result), cursor))
            else:
                #  Create a integer token if there's no decimal dot in the sequence.
                result.append(
                    create_integer_operand_token(expression[cursor:search_end],
                                                 len(result), cursor))

            #  Go to next position.
            cursor = search_end

            continue

        if cur_ch.isalpha():
            #  Search for next non-alphabet character.
            search_pos = cursor + 1
            search_end = end_position

            while search_pos < end_position:
                if not expression[search_pos].isalpha():
                    search_end = search_pos
                    break

                #  Go to next searching position.
                search_pos += 1

            if search_end == end_position:
                #  Create a symbol token if there's nothing behind the string we got.
                result.append(
                    create_symbol_operand_token(expression[cursor:search_end],
                                                len(result), cursor))
            else:
                next_ch = expression[search_end]
                if next_ch.isdigit(
                ) or next_ch == "(" or next_ch == "[" or next_ch == "{":
                    #  Create a function token if there's a number or a parenthesis behind the string we got.
                    result.append(
                        create_function_token(expression[cursor:search_end],
                                              len(result), cursor))
                else:
                    #  Create a symbol token.
                    result.append(
                        create_symbol_operand_token(
                            expression[cursor:search_end], len(result),
                            cursor))

            #  Go to next position.
            cursor = search_end

            continue

        if cur_ch == "+":
            #  Create a token.
            result.append(create_plus_operator_token(len(result), cursor))

            #  Go to next position.
            cursor += 1

            continue

        if cur_ch == "-":
            #  If the left operand exists, create a minus operator token. Otherwise, create a negative sign token.
            if prev_tok is not None and (prev_tok.is_operand()
                                         or prev_tok.is_right_parenthesis()):
                result.append(create_minus_operator_token(len(result), cursor))
            else:
                result.append(
                    create_negative_operator_token(len(result), cursor))

            #  Go to next position.
            cursor += 1

            continue

        if cur_ch == "*":
            #  Create a token.
            result.append(create_multiply_operator_token(len(result), cursor))

            #  Go to next position.
            cursor += 1

            continue

        if cur_ch == "/":
            #  Create a token.
            result.append(create_divide_operator_token(len(result), cursor))

            #  Go to next position.
            cursor += 1

            continue

        if cur_ch == "^":
            #  Create a token.
            result.append(create_pow_operator_token(len(result), cursor))

            #  Go to next position.
            cursor += 1

            continue

        if cur_ch == "(" or cur_ch == "[" or cur_ch == "{":
            result.append(
                create_left_parenthesis_token(cur_ch, len(result), cursor))
            cursor += 1
            continue

        if cur_ch == ")" or cur_ch == "]" or cur_ch == "}":
            #  Create a  token.
            result.append(
                create_right_parenthesis_token(cur_ch, len(result), cursor))

            #  Go to next position.
            cursor += 1

            continue

        if cur_ch == ",":
            #  Create a token.
            result.append(create_separator_token(len(result), cursor))

            #  Go to next position.
            cursor += 1

            continue

        #  Raise an untokenizable error.
        err = _cm_error.Error(
            _mexp_errors.MEXP_UNRECOGNIZED_TOKEN,
            _l10n_reg.get_message(
                lang_id, "parser.mexp.error.unrecognized_token.description"),
            options)
        err.push_traceback(
            expression, cursor, cursor,
            _l10n_reg.get_message(
                lang_id, "parser.mexp.error.unrecognized_token.message"))
        raise err

    return result
Exemple #11
0
def main():
    """Main entry of the BCE console shell."""

    #  Initialize the localization module.
    _shell_l10n.setup_localization()

    #  Capture SIGINT signal.
    _signal.signal(_signal.SIGINT, exit_signal_handler)

    #  Initialize a new option instance.
    option = _option.Option()

    #  Wrap the localization options.
    l10n_option = _public_option.LocaleOptionWrapper(option)

    #  Wrap the balancer options.
    balancer_option = _public_option.BalancerOptionWrapper(option)

    #  Create an argument parser and do parsing.
    arg_parser = _argparse.ArgumentParser(
        description=_l10n_registry.get_message(l10n_option.get_language_id(),
                                               "shell.console.command.header"))
    arg_parser.add_argument("--output-mathml",
                            dest="output_mathml",
                            action="store_const",
                            const=True,
                            default=False,
                            help=_l10n_registry.get_message(
                                l10n_option.get_language_id(),
                                "shell.console.command.output_mathml"))
    arg_parser.add_argument("--disable-banner",
                            dest="show_banner",
                            action="store_const",
                            const=False,
                            default=True,
                            help=_l10n_registry.get_message(
                                l10n_option.get_language_id(),
                                "shell.console.command.disable_banner"))
    arg_parser.add_argument(
        "--disable-bundled-abbreviations",
        dest="use_bundled_abbreviations",
        action="store_const",
        const=False,
        default=True,
        help=_l10n_registry.get_message(
            l10n_option.get_language_id(),
            "shell.console.command.disable_bundled_abbreviations"))
    arg_parser.add_argument(
        "--disable-error-correction",
        dest="enable_error_correction",
        action="store_const",
        const=False,
        default=True,
        help=_l10n_registry.get_message(
            l10n_option.get_language_id(),
            "shell.console.command.disable_error_correction"))
    arg_parser.add_argument(
        "--disable-auto-arranging",
        dest="enable_auto_arranging",
        action="store_const",
        const=False,
        default=True,
        help=_l10n_registry.get_message(
            l10n_option.get_language_id(),
            "shell.console.command.disable_auto_arranging"))
    arg_parser.add_argument("--unknown-header",
                            dest="unknown_header",
                            action="store",
                            type=str,
                            default="X",
                            help=_l10n_registry.get_message(
                                l10n_option.get_language_id(),
                                "shell.console.command.unknown_header"))
    arg_parser.add_argument(
        "--load-abbreviations-file",
        dest="abbreviations_file",
        action="store",
        type=str,
        default=None,
        help=_l10n_registry.get_message(
            l10n_option.get_language_id(),
            "shell.console.command.load_abbreviations_file"))
    arg_parser.add_argument("--language",
                            dest="language",
                            action="store",
                            type=str,
                            default=l10n_option.get_language_id(),
                            help=_l10n_registry.get_message(
                                l10n_option.get_language_id(),
                                "shell.console.command.language"))
    arg_parser.add_argument("--version",
                            dest="show_version",
                            action="store_const",
                            const=True,
                            default=False,
                            help=_l10n_registry.get_message(
                                l10n_option.get_language_id(),
                                "shell.console.command.show_version"))
    args = arg_parser.parse_args()

    #  Set the language.
    l10n_option.set_language_id(args.language)

    #  Enable / disable all balancer features.
    balancer_option.enable_error_correction_feature(
        args.enable_error_correction)
    balancer_option.enable_auto_side_arranging_feature(
        args.enable_auto_arranging)

    #  Get the software version.
    ver_major, ver_minor, ver_revision = _version.get_version()

    #  Show the version.
    if args.show_version:
        print(
            _l10n_registry.get_message(l10n_option.get_language_id(),
                                       "shell.console.application.version",
                                       replace_map={
                                           "$1": str(ver_major),
                                           "$2": str(ver_minor),
                                           "$3": str(ver_revision)
                                       }))
        _sys.exit(0)

    #  Show the banner.
    if args.show_banner and _sys.stdin.isatty():
        print(
            _l10n_registry.get_message(l10n_option.get_language_id(),
                                       "shell.console.application.banner",
                                       replace_map={
                                           "$1": str(ver_major),
                                           "$2": str(ver_minor),
                                           "$3": str(ver_revision)
                                       }))
        print(
            _l10n_registry.get_message(l10n_option.get_language_id(),
                                       "shell.console.application.copyright"))

    #  Get and check the unknown header.
    unknown_header = args.unknown_header
    assert isinstance(unknown_header, str)
    unknown_header = unknown_header.strip()
    if len(unknown_header) == 0 or not is_valid_unknown_header(unknown_header):
        print(
            _l10n_registry.get_message(
                l10n_option.get_language_id(),
                "shell.console.error.invalid_unknown_header.description"))
        _sys.exit(1)

    #  Initialize abbreviations.
    abbreviations = {}

    #  Load bundled abbreviations.
    if args.use_bundled_abbreviations:
        abbreviations = _copy.deepcopy(
            _public_db.BUNDLED_ABBREVIATION_DATABASE)

    #  Load extra abbreviations file.
    extra_arv_file_path = args.abbreviations_file
    if extra_arv_file_path is not None:
        #  Read the file.
        extra_arv_file_content = _utils_file_io.read_text_file(
            extra_arv_file_path)
        if extra_arv_file_content is None:
            print(
                _l10n_registry.get_message(
                    l10n_option.get_language_id(),
                    "shell.console.error.file_reading_error.description",
                    replace_map={"$1": extra_arv_file_path}))
            _sys.exit(1)

        #  Parse the file.
        extra_abbreviations = {}
        try:
            extra_abbreviations = _json.loads(extra_arv_file_content)
        except ValueError:
            print(
                _l10n_registry.get_message(
                    l10n_option.get_language_id(),
                    "shell.console.error.file_corrupted.description",
                    replace_map={"$1": extra_arv_file_path}))
            _sys.exit(1)

        #  Check.
        if not isinstance(extra_abbreviations, dict):
            print(
                _l10n_registry.get_message(
                    l10n_option.get_language_id(),
                    "shell.console.error.file_corrupted.description",
                    replace_map={"$1": extra_arv_file_path}))
            _sys.exit(1)

        for arv_name in extra_abbreviations:
            #  Get the abbreviation expression.
            arv_expression = extra_abbreviations[arv_name]

            #  Convert unicode to string in Python 2.
            if _utils_compatible.is_old_python():
                # noinspection PyUnresolvedReferences
                if isinstance(arv_name, unicode):
                    arv_name = str(arv_name)
                # noinspection PyUnresolvedReferences
                if isinstance(arv_expression, unicode):
                    arv_expression = str(arv_expression)

            #  Check.
            if not (isinstance(arv_expression, str) and _utils_input_chk.
                    check_input_expression_characters(arv_expression)):
                print(
                    _l10n_registry.get_message(
                        l10n_option.get_language_id(),
                        "shell.console.error.file_corrupted.description",
                        replace_map={"$1": extra_arv_file_path}))
                _sys.exit(1)

            #  Save the abbreviation.
            abbreviations[arv_name] = arv_expression

    #  Set abbreviations in the option object.
    _public_option.MoleculeParserOptionWrapper(
        option).set_abbreviation_mapping(abbreviations)

    while True:
        #  Input a chemical equation / expression.
        try:
            expression = _utils_compatible.input_prompt(">> ").replace(" ", "")
        except EOFError:
            break

        #  Ignore zero-length expressions and comment lines.
        if len(expression) == 0 or expression[0] == "#":
            continue

        #  Balance chemical equation / expression and print it out.
        try:
            printer_id = _public_printer.PRINTER_TEXT
            if args.output_mathml:
                printer_id = _public_printer.PRINTER_MATHML
            cb_ctx = {"symbols": set()}
            result = _public_api.balance_chemical_equation(
                expression,
                option,
                printer=printer_id,
                unknown_header=unknown_header,
                callback_after_balance=callback_after_balancing,
                callback_context=cb_ctx)
            assert isinstance(result, str)
            print(result)
        except _public_exception.ParserErrorWrapper as err:
            print(str(err))
        except _public_exception.LogicErrorWrapper as err:
            print(str(err))
        except _public_exception.InvalidCharacterException:
            err_message = _l10n_registry.get_message(
                l10n_option.get_language_id(),
                "shell.console.error.invalid_character.description")
            print(err_message)

    #  Print an empty line.
    print("")

    _sys.exit(0)
Exemple #12
0
def tokenize(expression, options):
    """Tokenize a chemical equation.

    :type expression: str
    :type options: bce.option.Option
    :param expression: The chemical equation.
    :param options: The options.
    :rtype : list[Token]
    :return: The token list.
    """

    #  Get the language ID.
    lang_id = _l10n_opt.OptionWrapper(options).get_language_id()

    #  Initialize the result container.
    result = []

    #  Initialize the cursor.
    cursor = 0

    while cursor < len(expression):
        #  Get current character.
        cur_ch = expression[cursor]
        if cur_ch == "+":
            #  Add a plus token.
            result.append(create_operator_plus_token(len(result), cursor))

            #  Next position.
            cursor += 1
        elif cur_ch == "-":
            #  Add a minus token.
            result.append(create_operator_minus_token(len(result), cursor))

            #  Next position.
            cursor += 1
        elif cur_ch == ";":
            #  Add a separator token.
            result.append(create_operator_separator_token(len(result), cursor))

            #  Next position.
            cursor += 1
        elif cur_ch == "=":
            #  Add an equal sign token.
            result.append(create_equal_token(len(result), cursor))

            #  Next position.
            cursor += 1
        else:
            #  Initialize the stack.
            pm = _adt_stack.Stack()

            #  Initialize the searching cursor.
            search_pos = cursor

            #  Initialize the molecule symbol.
            molecule_symbol = ""

            while search_pos < len(expression):
                #  Get current character.
                search_ch = expression[search_pos]

                if search_ch in ["(", "[", "{", "<"]:
                    #  Emulate pushing operation.
                    pm.push(search_pos)

                    #  Add the character.
                    molecule_symbol += search_ch
                elif search_ch in [")", "]", "}", ">"]:
                    #  Raise an error if there is no left parenthesis in the stack.
                    if len(pm) == 0:
                        err = _cm_error.Error(
                            _ce_error.CEXP_PARENTHESIS_MISMATCH,
                            _l10n_reg.get_message(
                                lang_id,
                                "parser.cexp.error.parenthesis_mismatch.description"
                            ), options)
                        err.push_traceback(
                            expression, search_pos, search_pos,
                            _l10n_reg.get_message(
                                lang_id,
                                "parser.cexp.error.parenthesis_mismatch.left"))
                        raise err

                    #  Emulate popping operation.
                    pm.pop()

                    #  Add the character.
                    molecule_symbol += search_ch
                elif search_ch in ["+", "-", ";", "="] and len(pm) == 0:
                    break
                else:
                    #  Add the character.
                    molecule_symbol += search_ch

                #  Move the searching cursor.
                search_pos += 1

            #  Raise an error if there are still some parentheses in the stack.
            if len(pm) != 0:
                err = _cm_error.Error(
                    _ce_error.CEXP_PARENTHESIS_MISMATCH,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.cexp.error.parenthesis_mismatch.description"),
                    options)

                while len(pm) != 0:
                    mismatched_pos = pm.pop()
                    err.push_traceback(
                        expression, mismatched_pos, mismatched_pos,
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.cexp.error.parenthesis_mismatch.right"))

                raise err

            #  Add a molecule token.
            result.append(
                create_molecule_token(molecule_symbol, len(result), cursor))

            #  Set the cursor.
            cursor = search_pos

    #  Add an end token.
    result.append(create_end_token(len(result), len(expression)))

    return result
Exemple #13
0
def generate_ast(expression, token_list, options):
    """Generate an AST from the token list.

    :type expression: str
    :type token_list: list[bce.parser.molecule.token.Token]
    :type options: bce.option.Option
    :param expression: The origin expression.
    :param token_list: The token list.
    :param options: The options.
    :rtype : bce.parser.ast.molecule.ASTNodeHydrateGroup | bce.parser.ast.molecule.ASTNodeMolecule
    :return: The root node of the generated AST.
    """

    #  Get the language ID.
    lang_id = _l10n_opt.OptionWrapper(options).get_language_id()

    #  Initialize the molecule status container.
    molecule_status = None

    #  Initialize the state machine.
    state = _STATE_ROOT

    #  Generate initial AST.
    root = _ml_ast_base.ASTNodeHydrateGroup()
    node = _ml_ast_base.ASTNodeMolecule(root)
    root.append_child(node)

    #  Register the starting position.
    root.register_starting_position_in_source_text(0)
    node.register_starting_position_in_source_text(0)

    #  Initialize the token cursor.
    cursor = 0

    while True:
        #  Get current token.
        token = token_list[cursor]

        if state == _STATE_ROOT:
            #  Find molecule in parent nodes and current node.
            while node is not None and not node.is_molecule():
                node = node.get_parent_node()
            if node is None:
                raise RuntimeError("BUG: Can't find molecule group.")

            #  Redirect by rules.
            if token.is_operand() and len(node) == 0:
                state = _STATE_PREFIX_NUMBER
            elif token.is_symbol():
                state = _STATE_ATOM
            elif token.is_abbreviation():
                state = _STATE_ABBREVIATION
            elif token.is_left_parenthesis():
                state = _STATE_LEFT_PARENTHESIS
            elif token.is_right_parenthesis():
                state = _STATE_RIGHT_PARENTHESIS
            elif token.is_electronic_begin():
                state = _STATE_ELECTRONIC
            elif token.is_hydrate_dot():
                state = _STATE_HYDRATE_DOT
            elif token.is_status():
                state = _STATE_MOLECULE_STATUS
            elif token.is_end():
                break
            else:
                #  Raise an error if the token can't be recognized.
                err = _cm_error.Error(
                    _ml_error.MOLECULE_UNEXPECTED_TOKEN,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.unexpected_token.description"),
                    options)
                err.push_traceback(
                    expression, token.get_position(),
                    token.get_position() + len(token.get_symbol()) - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.unexpected_token.other"))
                raise err
        elif state == _STATE_ATOM:
            #  Create a new atom node and register its starting position.
            new_node = _ml_ast_base.ASTNodeAtom(token.get_symbol(), node)
            new_node.register_starting_position_in_source_text(
                token.get_position())

            #  Add the node to the molecule group.
            node.append_child(new_node)

            #  Switch the node pointer to the new created node.
            node = new_node

            #  Next token.
            cursor += 1

            #  Go to read the suffix number.
            state = _STATE_SUFFIX_NUMBER
        elif state == _STATE_ABBREVIATION:
            #  Create a new abbreviation node and register its starting position.
            new_node = _ml_ast_base.ASTNodeAbbreviation(
                token.get_symbol()[1:-1], node)
            new_node.register_starting_position_in_source_text(
                token.get_position())

            #  Add the node to the molecule group.
            node.append_child(new_node)

            #  Switch the node pointer to the new created node.
            node = new_node

            #  Next token.
            cursor += 1

            #  Go to read the suffix number.
            state = _STATE_SUFFIX_NUMBER
        elif state == _STATE_LEFT_PARENTHESIS:
            #  Create new nodes.
            new_hydrate_grp = _ml_ast_base.ASTNodeHydrateGroup()
            new_molecule = _ml_ast_base.ASTNodeMolecule(new_hydrate_grp)
            new_parenthesis = _ml_ast_base.ASTNodeParenthesisWrapper(
                new_hydrate_grp, node)

            #  Link them correctly and them add the new created parenthesis node to the molecule group.
            new_hydrate_grp.set_parent_node(new_parenthesis)
            new_hydrate_grp.append_child(new_molecule)
            node.append_child(new_parenthesis)

            #  Switch the node pointer to the new created molecule node.
            node = new_molecule

            #  Register their starting positions.
            new_hydrate_grp.register_starting_position_in_source_text(
                token.get_position() + 1)
            new_molecule.register_starting_position_in_source_text(
                token.get_position() + 1)
            new_parenthesis.register_starting_position_in_source_text(
                token.get_position())

            #  Next token.
            cursor += 1

            #  Go to root state.
            state = _STATE_ROOT
        elif state == _STATE_RIGHT_PARENTHESIS:
            #  Find parenthesis node in parent nodes and current node.
            while node is not None and not node.is_parenthesis():
                #  Register the ending position of current working node.
                node.register_ending_position_in_source_text(
                    token.get_position() - 1)

                #  Go to the parent node.
                node = node.get_parent_node()

            #  Raise an error if the node can't be found.
            if node is None:
                err = _cm_error.Error(
                    _ml_error.MOLECULE_PARENTHESIS_MISMATCH,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.parenthesis_mismatch.description"
                    ), options)
                err.push_traceback(
                    expression, token.get_position(),
                    token.get_position() + len(token.get_symbol()) - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.parenthesis_mismatch.left"))
                raise err

            #  Register the ending position of current working node.
            node.set_right_parenthesis_position(token.get_position())

            #  Next token.
            cursor += 1

            #  Go to read the suffix number.
            state = _STATE_SUFFIX_NUMBER
        elif state == _STATE_ELECTRONIC:
            #  Save the starting position of the electronic descriptor.
            e_start_pos = token.get_position()

            #  Next token.
            cursor += 1
            token = token_list[cursor]

            #  Try to read the prefix number.
            e_pfx = _math_cst.ONE
            e_pfx_start = token.get_position()
            has_e_pfx_number = False
            while token.is_operand():
                #  Mark the flag.
                has_e_pfx_number = True

                #  Process the prefix number.
                e_pfx *= token.get_operand_value().simplify()

                #  Next token.
                cursor += 1
                token = token_list[cursor]

            #  Simplify before checking.
            e_pfx = e_pfx.simplify()

            #  Domain check.
            if e_pfx.is_negative or e_pfx.is_zero:
                err = _cm_error.Error(
                    _ml_error.MOLECULE_DOMAIN_ERROR,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.domain_error.description"),
                    options)
                err.push_traceback(
                    expression, e_pfx_start,
                    token.get_position() - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.domain_error.electronic_charge")
                )
                raise err

            #  Validate.
            if has_e_pfx_number and e_pfx == _math_cst.ONE:
                err = _cm_error.Error(
                    _ml_error.MOLECULE_EXCEED_OPERAND,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.exceed_operand.description"),
                    options)
                err.push_traceback(
                    expression, e_pfx_start,
                    token.get_position() - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.exceed_operand.electronic_charge"
                    ))
                raise err

            #  Process the electronic positivity flag.
            if token.is_electronic_positive_flag():
                pass
            elif token.is_electronic_negative_flag():
                e_pfx = -e_pfx
            else:
                if token.is_end():
                    err = _cm_error.Error(
                        _ml_error.MOLECULE_PARENTHESIS_MISMATCH,
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.molecule.error.parenthesis_mismatch.description"
                        ), options)
                    err.push_traceback(
                        expression, e_start_pos,
                        token.get_position() - 1,
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.molecule.error.parenthesis_mismatch.right")
                    )
                else:
                    #  Raise an error if current working token is not an electronic positivity flag.
                    err = _cm_error.Error(
                        _ml_error.MOLECULE_UNEXPECTED_TOKEN,
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.molecule.error.unexpected_token.description"
                        ), options)
                    err.push_traceback(
                        expression, token.get_position(),
                        token.get_position() + len(token.get_symbol()) - 1,
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.molecule.error.unexpected_token.electronic_suffix"
                        ))

                raise err

            #  Next token.
            cursor += 1
            token = token_list[cursor]

            #  Raise an error if current working token is not '>'.
            if not token.is_electronic_end():
                if token.is_end():
                    err = _cm_error.Error(
                        _ml_error.MOLECULE_PARENTHESIS_MISMATCH,
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.molecule.error.parenthesis_mismatch.description"
                        ), options)
                    err.push_traceback(
                        expression, e_start_pos,
                        token.get_position() - 1,
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.molecule.error.parenthesis_mismatch.right")
                    )
                else:
                    err = _cm_error.Error(
                        _ml_error.MOLECULE_UNEXPECTED_TOKEN,
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.molecule.error.unexpected_token.description"
                        ), options)
                    err.push_traceback(
                        expression, token.get_position(),
                        token.get_position() + len(token.get_symbol()) - 1,
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.molecule.error.unexpected_token.electronic_end"
                        ))

                raise err

            #  Next token.
            cursor += 1
            token = token_list[cursor]

            #  Raise an error if the electronic descriptor is not at the end of a molecule block.
            if not (token.is_right_parenthesis() or token.is_hydrate_dot()
                    or token.is_end() or token.is_status()):
                err = _cm_error.Error(
                    _ml_error.MOLECULE_UNEXPECTED_TOKEN,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.unexpected_token.description"),
                    options)
                err.push_traceback(
                    expression, e_start_pos,
                    token.get_position() - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.unexpected_token.electronic_misplaced"
                    ))
                raise err

            #  Set the electronic count.
            node.set_electronic_count(e_pfx)

            #  Go to root state.
            state = _STATE_ROOT
        elif state == _STATE_HYDRATE_DOT:
            #  Save the ending position of current working node.
            node.register_ending_position_in_source_text(token.get_position() -
                                                         1)

            #  Go to parent node.
            node = node.get_parent_node()
            assert isinstance(node, _ml_ast_base.ASTNodeHydrateGroup)

            #  Create a new molecule node and set its starting position.
            new_molecule = _ml_ast_base.ASTNodeMolecule(node)
            new_molecule.register_starting_position_in_source_text(
                token.get_position() + 1)

            #  Add the new created molecule node to the hydrate group node.
            node.append_child(new_molecule)

            #  Switch the node pointer to the new created molecule node.
            node = new_molecule

            #  Next token.
            cursor += 1

            #  Go to root state.
            state = _STATE_ROOT
        elif state == _STATE_PREFIX_NUMBER:
            #  Save the starting position of the prefix.
            pfx_start = token.get_position()

            #  Read prefix numbers.
            has_pfx_number = False
            while token.is_operand():
                #  Mark the flag.
                has_pfx_number = True

                #  Process the prefix number.
                node.set_prefix_number(node.get_prefix_number() *
                                       token.get_operand_value().simplify())

                #  Next token.
                cursor += 1
                token = token_list[cursor]

            #  Simplify before checking.
            pfx = node.get_prefix_number().simplify()

            #  Domain check.
            if pfx.is_negative or pfx.is_zero:
                err = _cm_error.Error(
                    _ml_error.MOLECULE_DOMAIN_ERROR,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.domain_error.description"),
                    options)
                err.push_traceback(
                    expression, pfx_start,
                    token.get_position() - 1,
                    _l10n_reg.get_message(
                        lang_id, "parser.molecule.error.domain_error.prefix"))
                raise err

            #  Validate.
            if has_pfx_number and pfx == _math_cst.ONE:
                err = _cm_error.Error(
                    _ml_error.MOLECULE_EXCEED_OPERAND,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.exceed_operand.description"),
                    options)
                err.push_traceback(
                    expression, pfx_start,
                    token.get_position() - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.exceed_operand.prefix"))
                raise err

            #  Set the prefix number.
            node.set_prefix_number(pfx)

            #  Go to root state.
            state = _STATE_ROOT
        elif state == _STATE_SUFFIX_NUMBER:
            #  Save the starting position of the suffix.
            sfx_start = token.get_position()

            #  Read suffix numbers.
            has_sfx_number = False
            while token.is_operand():
                #  Mark the flag.
                has_sfx_number = True

                #  Process the suffix number.
                node.set_suffix_number(node.get_suffix_number() *
                                       token.get_operand_value().simplify())

                #  Next token.
                cursor += 1
                token = token_list[cursor]

            #  Get the suffix.
            sfx = node.get_suffix_number()

            #  Simplify before checking.
            sfx = sfx.simplify()

            #  Domain check.
            if sfx.is_negative or sfx.is_zero:
                err = _cm_error.Error(
                    _ml_error.MOLECULE_DOMAIN_ERROR,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.domain_error.description"),
                    options)
                err.push_traceback(
                    expression, sfx_start,
                    token.get_position() - 1,
                    _l10n_reg.get_message(
                        lang_id, "parser.molecule.error.domain_error.suffix"))
                raise err

            #  Validate.
            if has_sfx_number and sfx == _math_cst.ONE:
                err = _cm_error.Error(
                    _ml_error.MOLECULE_EXCEED_OPERAND,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.exceed_operand.description"),
                    options)
                err.push_traceback(
                    expression, sfx_start,
                    token.get_position() - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.exceed_operand.suffix"))
                raise err

            #  Register the ending position of current working node.
            node.register_ending_position_in_source_text(token.get_position() -
                                                         1)

            #  Go to root state.
            state = _STATE_ROOT
        elif state == _STATE_MOLECULE_STATUS:
            #  Raise an error if the token is not at the end of the molecule.
            if not token_list[cursor + 1].is_end():
                err = _cm_error.Error(
                    _ml_error.MOLECULE_UNEXPECTED_TOKEN,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.unexpected_token.description"),
                    options)
                err.push_traceback(
                    expression, token.get_position(),
                    token.get_position() + len(token.get_symbol()) - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.unexpected_token.electronic_misplaced"
                    ))
                raise err

            #  Fetch the molecule status.
            if token.is_gas_status():
                molecule_status = _ml_ast_base.STATUS_GAS
            elif token.is_liquid_status():
                molecule_status = _ml_ast_base.STATUS_LIQUID
            elif token.is_solid_status():
                molecule_status = _ml_ast_base.STATUS_SOLID
            elif token.is_aqueous_status():
                molecule_status = _ml_ast_base.STATUS_AQUEOUS
            else:
                raise RuntimeError("BUG: Unrecognized status.")

            #  Next token.
            cursor += 1

            #  Go to root state.
            state = _STATE_ROOT
        else:
            raise RuntimeError("BUG: Unrecognized state.")

    #  Get the ending position.
    ending_pos = token_list[-1].get_position() - 1

    #  Initialize the parenthesis-mismatched flag.
    mismatch_flag = False

    #  Pre-create an error.
    err = _cm_error.Error(
        _ml_error.MOLECULE_PARENTHESIS_MISMATCH,
        _l10n_reg.get_message(
            lang_id, "parser.molecule.error.parenthesis_mismatch.description"),
        options)

    while node is not None:
        #  Register the ending position of current working node.
        node.register_ending_position_in_source_text(ending_pos)

        #  Mark the error flag and add an error description if current node is a parenthesis node.
        if node.is_parenthesis():
            mismatch_flag = True
            err.push_traceback(
                expression, node.get_starting_position_in_source_text(),
                node.get_starting_position_in_source_text(),
                _l10n_reg.get_message(
                    lang_id,
                    "parser.molecule.error.parenthesis_mismatch.right"))

        #  Go to parent node.
        node = node.get_parent_node()

    #  Raise an error if we have met at least 1 parenthesis node.
    if mismatch_flag:
        raise err

    #  Now, we have constructed the whole AST, but we got a lot of useless hydrate group node.
    #  So we have to remove them (all hydrate groups nodes which have only 1 child).

    #  Get iterate order.
    unpack_order = _ml_ast_bfs.do_bfs(root, True)

    #  Initialize unpacked node container.
    unpacked = {}

    for node in unpack_order:
        if node.is_hydrate_group():
            assert isinstance(node, _ml_ast_base.ASTNodeHydrateGroup)

            if len(node) == 1:
                #  Get the child node and reset its parent
                child = unpacked[id(node[0])]
                child.set_parent_node(node.get_parent_node())

                #  Save the unpack result.
                unpacked[id(node)] = child
            else:
                #  Update children links.
                for child_id in range(0, len(node)):
                    node[child_id] = unpacked[id(node[child_id])]

                #  Save the unpack result.
                unpacked[id(node)] = node
        elif node.is_molecule():
            assert isinstance(node, _ml_ast_base.ASTNodeMolecule)

            #  Update children links.
            for child_id in range(0, len(node)):
                node[child_id] = unpacked[id(node[child_id])]

            #  Save the unpack result.
            unpacked[id(node)] = node
        elif node.is_parenthesis():
            assert isinstance(node, _ml_ast_base.ASTNodeParenthesisWrapper)

            #  Update children links.
            node.set_inner_node(unpacked[id(node.get_inner_node())])

            #  Save  the unpack result.
            unpacked[id(node)] = node
        else:
            #  Save  the unpack result.
            unpacked[id(node)] = node

    #  Set molecule status.
    root = unpacked[id(root)]
    """:type : bce.parser.ast.molecule.ASTNodeHydrateGroup | bce.parser.ast.molecule.ASTNodeMolecule"""
    root.set_status(molecule_status)

    return root
Exemple #14
0
def parse_to_rpn(expression,
                 token_list,
                 options,
                 protected_header_enabled=False,
                 protected_header_prefix="X"):
    """Parse an infix math expression to RPN.

    :type expression: str
    :type token_list: list[bce.parser.mexp.token.Token]
    :type options: bce.option.Option
    :type protected_header_enabled: bool
    :type protected_header_prefix: str
    :param expression: The infix math expression.
    :param token_list: The tokenized infix math expression.
    :param options: The options.
    :param protected_header_enabled: Whether the protected headers are enabled.
    :param protected_header_prefix: The prefix of the protected headers.
    :rtype : list[bce.parser.mexp.token.Token]
    :return: The RPN token list.
    :raise bce.parser.common.error.Error: Raise when a parser error occurred.
    """

    #  Initialize
    lang_id = _l10n_opt.OptionWrapper(options).get_language_id()
    token_id = 0
    token_count = len(token_list)
    rpn = _RPNProcessor()
    current_argc = 0
    required_argc = 0
    prev_separator_position = -1
    parenthesis_mapping = {")": "(", "]": "[", "}": "{"}
    parenthesis_stack = _adt_stack.Stack()
    in_function = False

    while token_id < token_count:
        #  Get current token.
        token = token_list[token_id]

        #  Get previous token.
        if token_id != 0:
            prev_tok = token_list[token_id - 1]
        else:
            prev_tok = None

        if token.is_operand():
            if token.is_symbol_operand():
                #  Check the protected header.
                if protected_header_enabled and token.get_symbol().startswith(
                        protected_header_prefix):
                    err = _cm_error.Error(
                        _mexp_errors.MEXP_USE_PROTECTED_HEADER,
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.mexp.error.protected_header.description"),
                        options)
                    err.push_traceback(
                        expression,
                        token.get_position(),
                        token.get_position() + len(token.get_symbol()) - 1,
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.mexp.error.protected_header.message"),
                        replace_map={"$1": protected_header_prefix})
                    raise err

            if prev_tok is not None:
                if prev_tok.is_right_parenthesis():
                    if token.is_symbol_operand():
                        #  Do completion:
                        #    ([expr])[unknown] => ([expr])*[unknown]
                        #
                        #  For example:
                        #    (3-y)x => (3-y)*x
                        rpn.add_operator(
                            _mexp_token.create_multiply_operator_token())
                    else:
                        #  Numeric parenthesis suffix was not supported.
                        #
                        #  For example:
                        #    (x-y)3
                        #         ^
                        #         Requires a '*' before this token.
                        err = _cm_error.Error(
                            _mexp_errors.MEXP_MISSING_OPERATOR,
                            _l10n_reg.get_message(
                                lang_id,
                                "parser.mexp.error.missing_operator.description"
                            ), options)
                        err.push_traceback(
                            expression, token.get_position(),
                            token.get_position() + len(token.get_symbol()) - 1,
                            _l10n_reg.get_message(
                                lang_id,
                                "parser.mexp.error.missing_operator.multiply_before"
                            ))
                        raise err

                if prev_tok.is_operand():
                    #  Do completion:
                    #    [number][symbol] => [number]*[symbol]
                    #
                    #  For example:
                    #    4x => 4*x
                    rpn.add_operator(
                        _mexp_token.create_multiply_operator_token())

            #  Process the token.
            rpn.add_operand(token)

            #  Go to next token.
            token_id += 1

            continue
        elif token.is_function():
            #  Raise an error if the function is unsupported.
            if _mexp_functions.find_function(token.get_symbol()) is None:
                err = _cm_error.Error(
                    _mexp_errors.MEXP_FUNCTION_UNSUPPORTED,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.mexp.error.unsupported_function.description"),
                    options)
                err.push_traceback(
                    expression,
                    token.get_position(),
                    token.get_position() + len(token.get_symbol()) - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.mexp.error.unsupported_function.message"),
                    replace_map={"$1": token.get_symbol()})
                raise err

            if prev_tok is not None and (prev_tok.is_operand()
                                         or prev_tok.is_right_parenthesis()):
                #  Do completion:
                #    [num][fn] => [num]*[fn]
                #
                #  For example:
                #    4pow(2,3) => 4*pow(2,3)
                rpn.add_operator(_mexp_token.create_multiply_operator_token())

            #  Process the token.
            rpn.add_function(token)

            #  Go to next token.
            token_id += 1

            continue
        elif token.is_operator():
            #  Get the operator.
            op = _mexp_operators.OPERATORS[token.get_subtype()]

            #  Check operands.
            if op.is_required_left_operand():
                _check_left_operand(expression, token_list, token_id, options)

            if op.is_required_right_operand():
                _check_right_operand(expression, token_list, token_id, options)

            #  Process the token.
            rpn.add_operator(token)

            #  Go to next token.
            token_id += 1

            continue
        elif token.is_left_parenthesis():
            #  Save state.
            parenthesis_stack.push(
                _ParenthesisStackItem(token.get_symbol(), token_id,
                                      in_function, current_argc, required_argc,
                                      prev_separator_position))

            current_argc = 0
            prev_separator_position = token_id

            #  Set function state and get required argument count.
            if prev_tok is not None and prev_tok.is_function():
                #  Mark the flag.
                in_function = True

                #  Get the function object.
                fn_object = _mexp_functions.find_function(
                    prev_tok.get_symbol())
                if fn_object is None:
                    raise RuntimeError("BUG: Function object is None.")

                #  Get the required argument count.
                required_argc = fn_object.get_argument_count()
            else:
                #  Clear the flag.
                in_function = False
                required_argc = 0

            if prev_tok is not None and (prev_tok.is_right_parenthesis()
                                         or prev_tok.is_operand()):
                #  Do completion
                #    [lp][expr][rp][lp][expr][rp] => [lp][expr][rp]*[lp][expr][rp]
                #
                #  For example:
                #    (2+3)(4+2) => (2+3)*(4+2)
                rpn.add_operator(_mexp_token.create_multiply_operator_token())

            #  Process the token.
            rpn.add_left_parenthesis(token)

            #  Go to next token.
            token_id += 1

            continue
        elif token.is_right_parenthesis():
            #  Raise an error if there's no content between two separators.
            if prev_separator_position + 1 == token_id:
                err = _cm_error.Error(
                    _mexp_errors.MEXP_NO_CONTENT,
                    _l10n_reg.get_message(
                        lang_id, "parser.mexp.error.no_content.description"),
                    options)
                if prev_tok.is_left_parenthesis():
                    err.push_traceback(
                        expression, prev_tok.get_position(),
                        token.get_position(),
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.mexp.error.no_content.in_parentheses"))
                else:
                    err.push_traceback(
                        expression, prev_tok.get_position(),
                        token.get_position(),
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.mexp.error.no_content.in_argument"))

                raise err

            #  Raise an error if there's no left parenthesis to be matched with.
            if len(parenthesis_stack) == 0:
                err = _cm_error.Error(
                    _mexp_errors.MEXP_PARENTHESIS_MISMATCH,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.mexp.error.parenthesis_mismatch.description"),
                    options)
                err.push_traceback(
                    expression, token.get_position(), token.get_position(),
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.mexp.error.parenthesis_mismatch.left"))
                raise err

            #  Get the top item of the stack.
            p_item = parenthesis_stack.pop()

            #  Get the symbol of the parenthesis matches with current token.
            p_matched_sym = parenthesis_mapping[token.get_symbol()]

            #  Raise an error if the parenthesis was mismatched.
            if p_matched_sym != p_item.get_symbol():
                err = _cm_error.Error(
                    _mexp_errors.MEXP_PARENTHESIS_MISMATCH,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.mexp.error.parenthesis_mismatch.description"),
                    options)
                err.push_traceback(
                    expression,
                    token.get_position(),
                    token.get_position(),
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.mexp.error.parenthesis_mismatch.incorrect"),
                    replace_map={"$1": p_matched_sym})
                raise err

            if in_function:
                current_argc += 1

                #  Raise an error if the argument count was not matched.
                if current_argc != required_argc:
                    fn_token = token_list[p_item.get_token_id() - 1]

                    err = _cm_error.Error(
                        _mexp_errors.MEXP_FUNCTION_ARGUMENT_COUNT_MISMATCH,
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.mexp.error.argument_count_mismatch.description"
                        ), options)
                    err.push_traceback(
                        expression, fn_token.get_position(),
                        fn_token.get_position() + len(fn_token.get_symbol()) -
                        1,
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.mexp.error.argument_count_mismatch.message"
                        ), {
                            "$1": str(required_argc),
                            "$2": str(current_argc)
                        })
                    raise err

            #  Restore state.
            in_function = p_item.is_in_function()
            current_argc = p_item.get_current_argument_count()
            required_argc = p_item.get_required_argument_count()
            prev_separator_position = p_item.get_previous_separator_position()

            #  Process the token.
            rpn.add_right_parenthesis()

            #  Go to next token.
            token_id += 1

            continue
        elif token.is_separator():
            #  Raise an error if we're not in function now.
            if not in_function:
                err = _cm_error.Error(
                    _mexp_errors.MEXP_ILLEGAL_ARGUMENT_SEPARATOR,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.mexp.error.illegal_separator.description"),
                    options)
                err.push_traceback(
                    expression, token.get_position(), token.get_position(),
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.mexp.error.illegal_separator.message"))
                raise err

            #  Raise an error if there's no content between two separators.
            if prev_separator_position + 1 == token_id:
                err = _cm_error.Error(
                    _mexp_errors.MEXP_NO_CONTENT,
                    _l10n_reg.get_message(
                        lang_id, "parser.mexp.error.no_content.description"),
                    options)
                err.push_traceback(
                    expression, prev_tok.get_position(), token.get_position(),
                    _l10n_reg.get_message(
                        lang_id, "parser.mexp.error.no_content.in_argument"))
                raise err

            #  Save separator position.
            prev_separator_position = token_id

            #  Increase argument counter.
            current_argc += 1

            #  Process the token.
            rpn.add_separator()

            #  Go to next token.
            token_id += 1

            continue
        else:
            raise RuntimeError("Never reach this condition.")

    #  Raise an error if there are still some left parentheses in the stack.
    if len(parenthesis_stack) != 0:
        err = _cm_error.Error(
            _mexp_errors.MEXP_PARENTHESIS_MISMATCH,
            _l10n_reg.get_message(
                lang_id, "parser.mexp.error.parenthesis_mismatch.description"),
            options)
        while len(parenthesis_stack) != 0:
            p_item = parenthesis_stack.pop()
            p_token = token_list[p_item.get_token_id()]
            err.push_traceback(
                expression, p_token.get_position(), p_token.get_position(),
                _l10n_reg.get_message(
                    lang_id, "parser.mexp.error.parenthesis_mismatch.right"))
        raise err

    #  Pop all items off from the stack and push them onto the RPN token list.
    rpn.finalize()

    #  Return the RPN token list.
    return rpn.get_rpn()