def to_string(self, left_margin=0, indent=4): """Present the error in a human-readable text. :type left_margin: int :type indent: int :param left_margin: The left margin value. :param indent: The indent value. :rtype : str :return: The formatted string. """ # Wrap the locale options. lang_id = _l10n_opt.OptionWrapper(self.__opt).get_language_id() # Write header. s = " " * left_margin + _l10n_reg.get_message( lang_id, "logic.common.error.header", replace_map={"$1": self.get_error_code()}) + "\n\n" # Write description. s += " " * left_margin + _l10n_reg.get_message( lang_id, "logic.common.error.description") + "\n\n" s += " " * (left_margin + indent) + self.get_description() return s
def _macro_simplify(expression, mu_obj, node, options): """Macro for simplifying. :type expression: str :type mu_obj: MergeUtil :type node: bce.parser.ast.molecule._ASTNodeBaseML :type options: bce.option.Option :param expression: The origin expression. :param mu_obj: The MergeUtil object. :param node: The work node. :param options: The options. """ # Get the language ID. lang_id = _l10n_opt.OptionWrapper(options).get_language_id() # Simplify. removed = mu_obj.simplify() # Pre-create an atom-eliminated error. err = _cm_error.Error( _ml_error.MOLECULE_ELEMENT_ELIMINATED, _l10n_reg.get_message( lang_id, "parser.molecule.error.element_eliminated.description" ), options ) # Initialize the error flag. flag = False for symbol in removed: if symbol != "e": # Mark the flag. flag = True # Add a description. err.push_traceback( expression, node.get_starting_position_in_source_text(), node.get_ending_position_in_source_text(), _l10n_reg.get_message( lang_id, "parser.molecule.error.element_eliminated.message", replace_map={ "$1": symbol } ) ) # Raise the error if the flag was marked. if flag: raise err
def to_string(self, left_margin=0, indent=4): """Present the error in a human-readable form(string). :type left_margin: int :type indent: int :param left_margin: The left margin value. :param indent: The indent spaces count. :rtype : str :return: The formatted string. """ # Get the language ID. lang_id = _l10n_opt.OptionWrapper(self.__opt).get_language_id() # Write header. s = " " * left_margin + _l10n_reg.get_message( lang_id, "parser.common.error.header", replace_map={ "$1": self.get_error_code() } ) + "\n\n" # Write description. s += " " * left_margin + _l10n_reg.get_message( lang_id, "parser.common.error.description" ) + "\n\n" s += " " * (left_margin + indent) + self.__description # Write traceback items if have. if len(self.__traceback) != 0: # Write traceback header. s += "\n\n" + " " * left_margin + _l10n_reg.get_message( lang_id, "parser.common.error.traceback" ) # Write all traceback items. i = len(self.__traceback) - 1 while i >= 0: s += "\n\n" + self.__traceback[i].to_string(left_margin + indent, "^") i -= 1 return s
def _macro_register_form(expression, origin_form, new_form, options): """Macro of registering new form. :type expression: str :type origin_form: int :type new_form: int :type options: bce.option.Option :param expression: The chemical expression. :param origin_form: The origin form. :param new_form: The new form. :param options: The options. :rtype : int :return: The new form if no conflict exists. """ # Get the language ID. lang_id = _l10n_opt.OptionWrapper(options).get_language_id() if origin_form is not None and origin_form != new_form: err = _cm_error.Error( _cexp_error.CEXP_MIXED_FORM, _l10n_reg.get_message( lang_id, "parser.cexp.error.mixed_form.description" ), options ) err.push_traceback( expression, 0, len(expression) - 1, _l10n_reg.get_message( lang_id, "parser.cexp.error.mixed_form.message" ) ) raise err return new_form
def _check_right_operand(expression, token_list, token_id, options): """Check the right operand. :type expression: str :type token_list: list[bce.parser.mexp.token.Token] :type token_id: int :type options: bce.option.Option :param expression: (The same as the variable in parse_to_rpn() routine.) :param token_list: (The same as the variable in parse_to_rpn() routine.) :param token_id: (The same as the variable in parse_to_rpn() routine.) :param options: (The same as the variable in parse_to_rpn() routine.) :raise _cm_error.Error: Raise when there's no right operand. """ raise_err = False if token_id + 1 == len(token_list): raise_err = True else: next_tok = token_list[token_id + 1] if not (next_tok.is_left_parenthesis() or next_tok.is_operand() or next_tok.is_function()): raise_err = True if raise_err: # Get the language ID. lang_id = _l10n_opt.OptionWrapper(options).get_language_id() # Raise the error. err_pos = token_list[token_id].get_position() err = _cm_error.Error( _mexp_errors.MEXP_MISSING_OPERAND, _l10n_reg.get_message( lang_id, "parser.mexp.error.missing_operand.description"), options) err.push_traceback( expression, err_pos, err_pos, _l10n_reg.get_message(lang_id, "parser.mexp.error.missing_operand.right")) raise err
def parse_ast(expression, root_node, options, mexp_protected_header_enabled=False, mexp_protected_header_prefix="X"): """Parse an AST. :type expression: str :type root_node: bce.parser.ast.molecule.ASTNodeHydrateGroup | bce.parser.ast.molecule.ASTNodeMolecule :type options: bce.option.Option :type mexp_protected_header_enabled: bool :type mexp_protected_header_prefix: str :param expression: The origin expression. :param root_node: The root node of the AST. :param options: The options. :param mexp_protected_header_enabled: Whether the MEXP protected headers are enabled. :param mexp_protected_header_prefix: The prefix of the MEXP protected headers. :rtype : dict :return: The parsed atoms dictionary. """ # Wrap the interface option. if_opt = _interface_opt.OptionWrapper(options) # Wrap the molecule option. molecule_opt = _ml_opt.OptionWrapper(options) # Get the language ID. lang_id = _l10n_opt.OptionWrapper(options).get_language_id() # Get the iteration order. work_list = _ml_ast_bfs.do_bfs(root_node, True) # Initialize the parsed node container. parsed = {} """:type : dict[int, MergeUtil]""" # Iterate nodes from the leaves to the root. for work_node in work_list: if work_node.is_hydrate_group() or work_node.is_molecule(): assert isinstance(work_node, _ast_base.ASTNodeHydrateGroup) or \ isinstance(work_node, _ast_base.ASTNodeMolecule) # Get the prefix number. coeff = work_node.get_prefix_number() # Initialize a new merge utility. build = MergeUtil() # Process the electronics. if work_node.is_molecule(): el_charge = work_node.get_electronic_count().simplify() if not el_charge.is_zero: build.add("e", el_charge * coeff) # Iterate all children. for child_id in range(0, len(work_node)): # Get child node and its parsing result. child = work_node[child_id] child_parsed = parsed[id(child)] # Content check. if work_node.is_hydrate_group() and len(child_parsed) == 0: assert isinstance(child, _ast_base.ASTNodeMolecule) err = _cm_error.Error( _ml_error.MOLECULE_NO_CONTENT, _l10n_reg.get_message( lang_id, "parser.molecule.error.no_content.description" ), options ) if child_id == 0: err.push_traceback( expression, child.get_ending_position_in_source_text() + 1, child.get_ending_position_in_source_text() + 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.no_content.before" ) ) elif child_id == len(work_node) - 1: err.push_traceback( expression, child.get_starting_position_in_source_text() - 1, child.get_starting_position_in_source_text() - 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.no_content.after" ) ) else: err.push_traceback( expression, child.get_starting_position_in_source_text() - 1, child.get_ending_position_in_source_text() + 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.no_content.inside" ) ) raise err # Merge. build.merge(child_parsed, coeff) # Do simplifying. _macro_simplify(expression, build, work_node, options) # Save the parsed result. parsed[id(work_node)] = build elif work_node.is_atom(): assert isinstance(work_node, _ast_base.ASTNodeAtom) # Get suffix number. coeff = work_node.get_suffix_number() # Initialize a new merge utility. build = MergeUtil() # Add the atom. build.add(work_node.get_atom_symbol(), coeff) # Save the parsed result. parsed[id(work_node)] = build elif work_node.is_parenthesis(): assert isinstance(work_node, _ast_base.ASTNodeParenthesisWrapper) # Get suffix number. coeff = work_node.get_suffix_number() # Initialize a new merge utility. build = MergeUtil() # Get inner node and its parsing result. inner_parsed = parsed[id(work_node.get_inner_node())] # Content check. if len(inner_parsed) == 0: err = _cm_error.Error( _ml_error.MOLECULE_NO_CONTENT, _l10n_reg.get_message( lang_id, "parser.molecule.error.no_content.description" ), options ) err.push_traceback( expression, work_node.get_starting_position_in_source_text(), work_node.get_ending_position_in_source_text(), _l10n_reg.get_message( lang_id, "parser.molecule.error.no_content.inside" ) ) raise err # Merge. build.merge(inner_parsed, coeff) # Do simplifying. _macro_simplify(expression, build, work_node, options) # Save the parsed result. parsed[id(work_node)] = build elif work_node.is_abbreviation(): assert isinstance(work_node, _ast_base.ASTNodeAbbreviation) # Get the abbreviation symbol. abbr_symbol = work_node.get_abbreviation_symbol() # Check symbol length. if len(abbr_symbol) == 0: err = _cm_error.Error( _ml_error.MOLECULE_NO_CONTENT, _l10n_reg.get_message( lang_id, "parser.molecule.error.no_content.description" ), options ) err.push_traceback( expression, work_node.get_starting_position_in_source_text(), work_node.get_ending_position_in_source_text(), _l10n_reg.get_message( lang_id, "parser.molecule.error.no_content.inside" ) ) raise err # Get the abbreviation mapping. abbr_mapping = molecule_opt.get_abbreviation_mapping() # Check the existence. if abbr_symbol not in abbr_mapping: err = _cm_error.Error( _ml_error.MOLECULE_UNSUPPORTED_ABBREVIATION, _l10n_reg.get_message( lang_id, "parser.molecule.error.unsupported_abbreviation.description" ), options ) err.push_traceback( expression, work_node.get_starting_position_in_source_text() + 1, work_node.get_ending_position_in_source_text() - 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.unsupported_abbreviation.message" ) ) raise err abbr_expression = abbr_mapping[abbr_symbol] try: abbr_parser = if_opt.get_molecule_parser() abbr_ast_root = abbr_parser.parse_expression( abbr_expression, options, mexp_protected_header_enabled=mexp_protected_header_enabled, mexp_protected_header_prefix=mexp_protected_header_prefix ) abbr_resolved = abbr_parser.parse_ast( abbr_expression, abbr_ast_root, options, mexp_protected_header_enabled=mexp_protected_header_enabled, mexp_protected_header_prefix=mexp_protected_header_prefix ) except _cm_error.Error as err: err.push_traceback( abbr_expression, 0, len(abbr_expression) - 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.parsing_abbreviation.expand" ) ) err.push_traceback( expression, work_node.get_starting_position_in_source_text() + 1, work_node.get_ending_position_in_source_text() - 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.parsing_abbreviation.origin" ) ) raise err # Initialize a new merge utility. build = MergeUtil() # Get the suffix number. coeff = work_node.get_suffix_number() # Add atoms. for atom_symbol in abbr_resolved: build.add(atom_symbol, abbr_resolved[atom_symbol] * coeff) # Do simplifying. _macro_simplify(expression, build, work_node, options) # Save the parsed result. parsed[id(work_node)] = build else: raise RuntimeError("Never reach this condition.") # Get the parsing result of the root node. root_node_parsed = parsed[id(root_node)] # Content check. if len(root_node_parsed) == 0: err = _cm_error.Error( _ml_error.MOLECULE_NO_CONTENT, _l10n_reg.get_message( lang_id, "parser.molecule.error.no_content.description" ), options ) err.push_traceback( expression, 0, len(expression) - 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.no_content.inside" ) ) raise err return root_node_parsed.get_data()
def tokenize(expression, options, mexp_protected_header_enabled=False, mexp_protected_header_prefix="X"): """Tokenize a molecule expression. :type expression: str :type options: bce.option.Option :type mexp_protected_header_enabled: bool :type mexp_protected_header_prefix: str :param expression: The expression. :param options: The options. :param mexp_protected_header_enabled: Whether the MEXP protected headers are enabled. :param mexp_protected_header_prefix: The prefix of the MEXP protected headers. :rtype : list[Token] :return: The token list. :raise bce.parser.common.error.Error: Raise when a parser error occurred. """ # Initialize. lang_id = _l10n_opt.OptionWrapper(options).get_language_id() if_opt = _interface_opt.OptionWrapper(options) result = [] cur_pos = 0 end_pos = len(expression) while cur_pos < end_pos: cur_ch = expression[cur_pos] # Read a integer token if current character is a digit. if cur_ch.isdigit(): # Search for the next non-digit character. search_pos = cur_pos + 1 search_end = end_pos while search_pos < end_pos: search_ch = expression[search_pos] if not search_ch.isdigit(): search_end = search_pos break # Go to next searching position. search_pos += 1 # Create an integer token. result.append( create_integer_operand_token(expression[cur_pos:search_end], len(result), cur_pos)) # Go to next position. cur_pos = search_end continue # Read an atom symbol if current character is a upper-case alphabet. if cur_ch.isupper(): # Search for next non-lower-case character. search_pos = cur_pos + 1 search_end = end_pos while search_pos < end_pos: if not expression[search_pos].islower(): search_end = search_pos break # Go to next searching position. search_pos += 1 # Create a symbol token. result.append( create_symbol_token(expression[cur_pos:search_end], len(result), cur_pos)) # Go to next position. cur_pos = search_end continue # Read a hydrate-dot token if current character is a dot. if cur_ch == ".": # Create a dot token. result.append(create_hydrate_dot_token(len(result), cur_pos)) # Go to next position. cur_pos += 1 continue if expression.startswith("(g)", cur_pos): # Create a status descriptor token. result.append(create_gas_status_token(len(result), cur_pos)) # Go to next position. cur_pos += 3 continue if expression.startswith("(l)", cur_pos): # Create a status descriptor token. result.append(create_liquid_status_token(len(result), cur_pos)) # Go to next position. cur_pos += 3 continue if expression.startswith("(s)", cur_pos): # Create a status descriptor token. result.append(create_solid_status_token(len(result), cur_pos)) # Go to next position. cur_pos += 3 continue if expression.startswith("(aq)", cur_pos): # Create a status descriptor token. result.append(create_aqueous_status_token(len(result), cur_pos)) # Go to next position. cur_pos += 4 continue # Read a normal left parenthesis if current character is '('. if cur_ch == "(": # Create a left parenthesis token. result.append(create_left_parenthesis_token(len(result), cur_pos)) # Go to next position. cur_pos += 1 continue # Read a normal right parenthesis if current character is ')'. if cur_ch == ")": # Create a right parenthesis token. result.append(create_right_parenthesis_token(len(result), cur_pos)) # Go to next position. cur_pos += 1 continue # Read a abbreviation if current character is '['. if cur_ch == "[": # Find the ']'. search_end = -1 search_pos = cur_pos + 1 while search_pos < end_pos: if expression[search_pos] == "]": search_end = search_pos + 1 break # Go to next searching position. search_pos += 1 # Raise an error if we can't find the ']'. if search_end == -1: err = _cm_error.Error( _ml_error.MOLECULE_PARENTHESIS_MISMATCH, _l10n_reg.get_message( lang_id, "parser.molecule.error.parenthesis_mismatch.description" ), options) err.push_traceback( expression, cur_pos, cur_pos, _l10n_reg.get_message( lang_id, "parser.molecule.error.parenthesis_mismatch.right")) raise err # Create an abbreviation token. result.append( create_abbreviation_token(expression[cur_pos:search_end], len(result), cur_pos)) # Go to next position. cur_pos = search_end continue # Read a math expression if current character is '{'. if cur_ch == "{": # Simulate a parenthesis stack to find the end '}'. p_mexp = 0 # Searching the end '}'. search_end = -1 search_pos = cur_pos + 1 while search_pos < end_pos: search_ch = expression[search_pos] if search_ch == "(" or search_ch == "[" or search_ch == "{": # If current character is a left parenthesis, push it onto the stack. p_mexp += 1 elif search_ch == ")" or search_ch == "]" or search_ch == "}": # When we meet a right parenthesis and there's no left parenthesis in the stack. # The parenthesis we met should be the end '}'. if p_mexp == 0: # Raise an error if the parenthesis isn't '}'. if search_ch != "}": err = _cm_error.Error( _ml_error.MOLECULE_PARENTHESIS_MISMATCH, _l10n_reg.get_message( lang_id, "parser.molecule.error.parenthesis_mismatch.description" ), options) err.push_traceback( expression, search_pos, search_pos, _l10n_reg.get_message( lang_id, "parser.molecule.error.parenthesis_mismatch.incorrect", replace_map={"$1": "}"})) raise err # Set the end position. search_end = search_pos + 1 break # Pop the parenthesis off from the stack. p_mexp -= 1 else: pass # Go to next searching position. search_pos += 1 # Raise an error if we can't find the end '}'. if search_end == -1: err = _cm_error.Error( _ml_error.MOLECULE_PARENTHESIS_MISMATCH, _l10n_reg.get_message( lang_id, "parser.molecule.error.parenthesis_mismatch.description" ), options) err.push_traceback( expression, cur_pos, cur_pos, _l10n_reg.get_message( lang_id, "parser.molecule.error.parenthesis_mismatch.right")) raise err # Raise an error if the math expression has no content. if cur_pos + 2 == search_end: err = _cm_error.Error( _ml_error.MOLECULE_NO_CONTENT, _l10n_reg.get_message( lang_id, "parser.molecule.error.no_content.description"), options) err.push_traceback( expression, cur_pos, cur_pos + 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.no_content.inside")) raise err # Get the expression. mexp_expr = expression[cur_pos:search_end] # Evaluate the expression. try: ev_value = if_opt.get_mexp_parser().parse( mexp_expr, options, protected_header_enabled=mexp_protected_header_enabled, protected_header_prefix=mexp_protected_header_prefix) except _cm_error.Error as err: err.push_traceback( expression, cur_pos, search_end - 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.parsing_mexp.message")) raise err # Create a math expression token. result.append( create_mexp_operand_token(mexp_expr, ev_value, len(result), cur_pos)) # Go to next position. cur_pos = search_end continue if cur_ch == "<": # Create an electronic begin parenthesis token. result.append(create_electronic_begin_token(len(result), cur_pos)) # Go to next position. cur_pos += 1 continue if cur_ch == ">": # Create an electronic begin parenthesis token. result.append(create_electronic_end_token(len(result), cur_pos)) # Go to next position. cur_pos += 1 continue if expression.startswith("e+", cur_pos): # Create a positive electronic flag token. result.append( create_positive_electronic_flag_token(len(result), cur_pos)) # Go to next position. cur_pos += 2 continue if expression.startswith("e-", cur_pos): # Create a negative electronic flag token. result.append( create_negative_electronic_flag_token(len(result), cur_pos)) # Go to next position. cur_pos += 2 continue # Raise an error if current character can't be tokenized. err = _cm_error.Error( _ml_error.MOLECULE_UNRECOGNIZED_TOKEN, _l10n_reg.get_message( lang_id, "parser.molecule.error.unrecognized_token.description"), options) err.push_traceback( expression, cur_pos, cur_pos, _l10n_reg.get_message( lang_id, "parser.molecule.error.unrecognized_token.message")) raise err # Add an end token. result.append(create_end_token(len(result), len(expression))) return result
def parse(expression, token_list, options, mexp_protected_header_enabled=False, mexp_protected_header_prefix="X"): """Parse the tokenized chemical equation. :type expression: str :type token_list: list[bce.parser.cexp.token.Token] :type options: bce.option.Option :type mexp_protected_header_enabled: bool :type mexp_protected_header_prefix: str :param expression: Origin chemical equation. :param token_list: The tokenized chemical equation. :param options: The options. :param mexp_protected_header_enabled: Whether the MEXP protected headers are enabled. :param mexp_protected_header_prefix: The prefix of the MEXP protected headers. :rtype : bce.parser.interface.cexp_parser.ChemicalEquation :return: The parsed chemical equation. """ # Wrap the interface option. if_opt = _interface_opt.OptionWrapper(options) # Get the language ID. lang_id = _l10n_opt.OptionWrapper(options).get_language_id() # Initialize an empty chemical equation. ret = _cexp_interface.ChemicalEquation() # Initialize the sign. operator = _cexp_interface.OPERATOR_PLUS # Initialize the form container. form = None # Initialize the side mark. # (side == False: Left side; side == True: Right side;) side = False # Initialize the state. state = _STATE_ROUTE_1 # Initialize other variables. read_molecule_end = None equal_sign_position = -1 # Initialize the token cursor. cursor = 0 while True: token = token_list[cursor] if state == _STATE_ROUTE_1: # Reset the operator to '+'. operator = _cexp_interface.OPERATOR_PLUS # Redirect by rules. if token.is_operator_minus(): # Go to read the '-'. state = _STATE_READ_MINUS_1 else: # Go and try to read a molecule. read_molecule_end = _STATE_ROUTE_2 state = _STATE_READ_MOLECULE elif state == _STATE_READ_MINUS_1: # Register the new form. form = _macro_register_form(expression, form, _FORM_NORMAL, options) # Set the operator to '-'. operator = _cexp_interface.OPERATOR_MINUS # Next token. cursor += 1 # Go to read-molecule state. read_molecule_end = _STATE_ROUTE_2 state = _STATE_READ_MOLECULE elif state == _STATE_READ_MOLECULE: if not token.is_molecule(): if token.is_end(): if cursor == 0: # In this condition, we got an empty expression. Raise an error. err = _cm_error.Error( _cexp_error.CEXP_EMPTY_EXPRESSION, _l10n_reg.get_message( lang_id, "parser.cexp.error.empty_expression.description" ), options ) raise err else: # There is no content between the end token and previous token. Raise an error. err = _cm_error.Error( _cexp_error.CEXP_NO_CONTENT, _l10n_reg.get_message( lang_id, "parser.cexp.error.no_content.description" ), options ) err.push_traceback( expression, token.get_position() - 1, token.get_position() - 1, _l10n_reg.get_message( lang_id, "parser.cexp.error.no_content.operator_after" ) ) raise err else: err = _cm_error.Error( _cexp_error.CEXP_NO_CONTENT, _l10n_reg.get_message( lang_id, "parser.cexp.error.no_content.description" ), options ) if cursor == 0: # There is no content before this token. Raise an error. err.push_traceback( expression, token.get_position(), token.get_position() + len(token.get_symbol()) - 1, _l10n_reg.get_message( lang_id, "parser.cexp.error.no_content.operator_before" ) ) else: # There is no content between this token and previous token. Raise an error. err.push_traceback( expression, token.get_position() - 1, token.get_position() + len(token.get_symbol()) - 1, _l10n_reg.get_message( lang_id, "parser.cexp.error.no_content.operator_between" ) ) raise err try: # Get the molecule parser. ml_parser = if_opt.get_molecule_parser() # Parse the molecule. ml_ast_root = ml_parser.parse_expression( token.get_symbol(), options, mexp_protected_header_enabled=mexp_protected_header_enabled, mexp_protected_header_prefix=mexp_protected_header_prefix ) # Separate the coefficient from the AST. ml_coefficient = ml_ast_root.get_prefix_number() ml_ast_root.set_prefix_number(_math_cst.ONE) # Parse the AST. ml_atoms_dict = ml_parser.parse_ast( token.get_symbol(), ml_ast_root, options, mexp_protected_header_enabled=mexp_protected_header_enabled, mexp_protected_header_prefix=mexp_protected_header_prefix ) # Add the molecule to the chemical equation. if side: ret.append_right_item(operator, ml_coefficient, ml_ast_root, ml_atoms_dict) else: ret.append_left_item(operator, ml_coefficient, ml_ast_root, ml_atoms_dict) except _cm_error.Error as err: # Add error description. err.push_traceback( expression, token.get_position(), token.get_position() + len(token.get_symbol()) - 1, _l10n_reg.get_message( lang_id, "parser.cexp.error.parsing_molecule.message" ) ) raise err # Next token. cursor += 1 # Redirect by pre-saved state. state = read_molecule_end elif state == _STATE_ROUTE_2: # Redirect by rules. if token.is_operator_plus(): state = _STATE_READ_PLUS elif token.is_operator_minus(): state = _STATE_READ_MINUS_2 elif token.is_operator_separator(): state = _STATE_READ_SEPARATOR elif token.is_equal(): state = _STATE_READ_EQUAL_SIGN elif token.is_end(): break else: raise RuntimeError("BUG: Unexpected token (should never happen).") elif state == _STATE_READ_PLUS: # Register the new form. form = _macro_register_form(expression, form, _FORM_NORMAL, options) # Set the operator to '+'. operator = _cexp_interface.OPERATOR_PLUS # Next token. cursor += 1 # Go to read-molecule state. read_molecule_end = _STATE_ROUTE_2 state = _STATE_READ_MOLECULE elif state == _STATE_READ_MINUS_2: # Register the new form. form = _macro_register_form(expression, form, _FORM_NORMAL, options) # Set the operator to '-'. operator = _cexp_interface.OPERATOR_MINUS # Next token. cursor += 1 # Go to read-molecule state. read_molecule_end = _STATE_ROUTE_2 state = _STATE_READ_MOLECULE elif state == _STATE_READ_SEPARATOR: # Register the new form. form = _macro_register_form(expression, form, _FORM_AUTO_CORRECTION, options) # Set the operator to '+'. operator = _cexp_interface.OPERATOR_PLUS # Next token. cursor += 1 # Go to read-molecule state. read_molecule_end = _STATE_ROUTE_2 state = _STATE_READ_MOLECULE elif state == _STATE_READ_EQUAL_SIGN: # Register the new form. form = _macro_register_form(expression, form, _FORM_NORMAL, options) # Next token. cursor += 1 # Raise an error if the equal sign is duplicated. if side: err = _cm_error.Error( _cexp_error.CEXP_DUPLICATED_EQUAL_SIGN, _l10n_reg.get_message( lang_id, "parser.cexp.error.duplicated_equal_sign.description" ), options ) err.push_traceback( expression, token.get_position(), token.get_position() + len(token.get_symbol()) - 1, _l10n_reg.get_message( lang_id, "parser.cexp.error.duplicated_equal_sign.duplicated" ) ) err.push_traceback( expression, equal_sign_position, equal_sign_position, _l10n_reg.get_message( lang_id, "parser.cexp.error.duplicated_equal_sign.previous" ) ) raise err # Save the position of the equal sign. equal_sign_position = token.get_position() # Mark the side flag. side = True # Go to route 1. state = _STATE_ROUTE_1 else: raise RuntimeError("BUG: Unexpected state.") # Raise an error if there is only 1 molecule. if len(ret) == 1: err = _cm_error.Error( _cexp_error.CEXP_ONLY_ONE_MOLECULE, _l10n_reg.get_message( lang_id, "parser.cexp.error.only_one_molecule.description" ), options ) err.push_traceback( expression, 0, len(expression) - 1, _l10n_reg.get_message( lang_id, "parser.cexp.error.only_one_molecule.message" ) ) raise err # Check form. if form is None: raise RuntimeError("BUG: Form was not set.") # Raise an error if there is no equal sign (for normal form only). if form == _FORM_NORMAL and not side: err = _cm_error.Error( _cexp_error.CEXP_NO_EQUAL_SIGN, _l10n_reg.get_message( lang_id, "parser.cexp.error.no_equal_sign.description" ), options ) err.push_traceback( expression, 0, len(expression) - 1, _l10n_reg.get_message( lang_id, "parser.cexp.error.no_equal_sign.message" ) ) raise err return ret
def balance_chemical_equation(cexp_object, options, unknown_header="X"): """Balance a chemical equation. :type cexp_object: bce.parser.interface.cexp_parser.ChemicalEquation :type options: bce.option.Option :type unknown_header: str :param cexp_object: The chemical equation object. :param options: The options. :param unknown_header: The header of unknowns. """ # Get the language ID. lang_id = _l10n_opt.OptionWrapper(options).get_language_id() # Wrap the balancer options. balancer_opt = _bce_option.OptionWrapper(options) # Get enabled features. is_error_correction_enabled = balancer_opt.is_error_correction_feature_enabled( ) is_auto_arranging_enabled = balancer_opt.is_auto_side_arranging_feature_enabled( ) # Get whether the chemical equation is in auto-arranging form. is_auto_arranging_form = (cexp_object.get_right_item_count() == 0) # Raise an error if the chemical equation is in auto-arranging form without the feature enabled. if is_auto_arranging_form and not is_auto_arranging_enabled: raise _cm_error.Error( _bce_error.BALANCER_FEATURE_DISABLED, _l10n_reg.get_message( lang_id, "logic.balancer.error.feature_disabled.auto_arranging"), options) # Build a matrix and backup. equations = _bce_model.build_model_equations(cexp_object) # Solve the equation and check the answer. solved = _math_equation.solve_equations(equations) # Post solving. coefficients = _bce_model.generate_balanced_coefficients( solved, header=unknown_header) # Merge. _bce_merger.merge_coefficients_with_cexp_object(cexp_object, coefficients) # Remove items with coefficient 0. if is_error_correction_enabled: cexp_object.remove_items_with_coefficient_zero() # Move items that have negative coefficient to another side. if is_auto_arranging_form or is_error_correction_enabled: cexp_object.move_items_with_negative_coefficient_to_another_side() # Check balancing errors in left items. for idx in range(0, cexp_object.get_left_item_count()): # Get the coefficient. coefficient = cexp_object.get_left_item(idx).get_coefficient() # Simplify before checking. coefficient = coefficient.simplify() # Check. if coefficient.is_negative or coefficient.is_zero: raise _cm_error.Error( _bce_error.BALANCER_FEATURE_DISABLED, _l10n_reg.get_message( lang_id, "logic.balancer.error.feature_disabled.error_correction"), options) # Check balancing errors in right items. for idx in range(0, cexp_object.get_right_item_count()): # Get the coefficient. coefficient = cexp_object.get_right_item(idx).get_coefficient() # Simplify before checking. coefficient = coefficient.simplify() # Check. if coefficient.is_negative or coefficient.is_zero: raise _cm_error.Error( _bce_error.BALANCER_FEATURE_DISABLED, _l10n_reg.get_message( lang_id, "logic.balancer.error.feature_disabled.error_correction"), options) # Integerize the coefficients. cexp_object.coefficients_integerize() # 'All-eliminated' check. if len(cexp_object) == 0: raise _cm_error.Error( _bce_error.BALANCER_SIDE_ELIMINATED, _l10n_reg.get_message(lang_id, "logic.balancer.error.side_eliminated.all"), options) # 'Auto-arranging form with multiple answer' check. if is_auto_arranging_form and (cexp_object.get_left_item_count() == 0 or cexp_object.get_right_item_count() == 0): raise _cm_error.Error( _bce_error.BALANCER_SIDE_ELIMINATED, _l10n_reg.get_message( lang_id, "logic.balancer.error.auto_arrange_with_multiple_answers.description" ), options) # 'Left side eliminated' check. if cexp_object.get_left_item_count() == 0: raise _cm_error.Error( _bce_error.BALANCER_SIDE_ELIMINATED, _l10n_reg.get_message(lang_id, "logic.balancer.error.side_eliminated.left"), options) # 'Right side eliminated' check. if cexp_object.get_right_item_count() == 0: raise _cm_error.Error( _bce_error.BALANCER_SIDE_ELIMINATED, _l10n_reg.get_message( lang_id, "logic.balancer.error.side_eliminated.right"), options) # Guess direction if the form is auto-correction. if is_auto_arranging_form and _bce_direct.guess_reaction_direction( cexp_object) == _bce_direct.GSD_RIGHT_TO_LEFT: cexp_object.flip()
def tokenize(expression, options): """Tokenize a math expression. :type expression: str :type options: bce.option.Option :param expression: The math expression. :param options: The options. :rtype : list[Token] :return: The token list. :raise bce.parser.common.error.Error: Raise when meet a parser error. """ # Initialize. result = [] cursor = 0 end_position = len(expression) prev_tok = None lang_id = _l10n_opt.OptionWrapper(options).get_language_id() while cursor < end_position: cur_ch = expression[cursor] # Get previous token if possible. if len(result) != 0: prev_tok = result[-1] # Read a number token if current character is a digit. if cur_ch.isdigit(): # Search for next non-digit and non-dot character. met_dot = False prev_dot_pos = -1 search_pos = cursor + 1 search_end = end_position while search_pos < end_position: search_ch = expression[search_pos] if search_ch == ".": # If we met decimal dot more than once, raise an duplicated-dot error. if met_dot: err = _cm_error.Error( _mexp_errors.MEXP_DUPLICATED_DECIMAL_DOT, _l10n_reg.get_message( lang_id, "parser.mexp.error.duplicated_decimal_dot.description" ), options) err.push_traceback( expression, search_pos, search_pos, _l10n_reg.get_message( lang_id, "parser.mexp.error.duplicated_decimal_dot.duplicated_dot" )) err.push_traceback( expression, prev_dot_pos, prev_dot_pos, _l10n_reg.get_message( lang_id, "parser.mexp.error.duplicated_decimal_dot.previous_dot" )) raise err else: met_dot = True prev_dot_pos = search_pos else: if not search_ch.isdigit(): search_end = search_pos break # Go to next searching position. search_pos += 1 if met_dot: # Create a float token if there's a decimal dot in the sequence. result.append( create_float_operand_token(expression[cursor:search_end], len(result), cursor)) else: # Create a integer token if there's no decimal dot in the sequence. result.append( create_integer_operand_token(expression[cursor:search_end], len(result), cursor)) # Go to next position. cursor = search_end continue if cur_ch.isalpha(): # Search for next non-alphabet character. search_pos = cursor + 1 search_end = end_position while search_pos < end_position: if not expression[search_pos].isalpha(): search_end = search_pos break # Go to next searching position. search_pos += 1 if search_end == end_position: # Create a symbol token if there's nothing behind the string we got. result.append( create_symbol_operand_token(expression[cursor:search_end], len(result), cursor)) else: next_ch = expression[search_end] if next_ch.isdigit( ) or next_ch == "(" or next_ch == "[" or next_ch == "{": # Create a function token if there's a number or a parenthesis behind the string we got. result.append( create_function_token(expression[cursor:search_end], len(result), cursor)) else: # Create a symbol token. result.append( create_symbol_operand_token( expression[cursor:search_end], len(result), cursor)) # Go to next position. cursor = search_end continue if cur_ch == "+": # Create a token. result.append(create_plus_operator_token(len(result), cursor)) # Go to next position. cursor += 1 continue if cur_ch == "-": # If the left operand exists, create a minus operator token. Otherwise, create a negative sign token. if prev_tok is not None and (prev_tok.is_operand() or prev_tok.is_right_parenthesis()): result.append(create_minus_operator_token(len(result), cursor)) else: result.append( create_negative_operator_token(len(result), cursor)) # Go to next position. cursor += 1 continue if cur_ch == "*": # Create a token. result.append(create_multiply_operator_token(len(result), cursor)) # Go to next position. cursor += 1 continue if cur_ch == "/": # Create a token. result.append(create_divide_operator_token(len(result), cursor)) # Go to next position. cursor += 1 continue if cur_ch == "^": # Create a token. result.append(create_pow_operator_token(len(result), cursor)) # Go to next position. cursor += 1 continue if cur_ch == "(" or cur_ch == "[" or cur_ch == "{": result.append( create_left_parenthesis_token(cur_ch, len(result), cursor)) cursor += 1 continue if cur_ch == ")" or cur_ch == "]" or cur_ch == "}": # Create a token. result.append( create_right_parenthesis_token(cur_ch, len(result), cursor)) # Go to next position. cursor += 1 continue if cur_ch == ",": # Create a token. result.append(create_separator_token(len(result), cursor)) # Go to next position. cursor += 1 continue # Raise an untokenizable error. err = _cm_error.Error( _mexp_errors.MEXP_UNRECOGNIZED_TOKEN, _l10n_reg.get_message( lang_id, "parser.mexp.error.unrecognized_token.description"), options) err.push_traceback( expression, cursor, cursor, _l10n_reg.get_message( lang_id, "parser.mexp.error.unrecognized_token.message")) raise err return result
def tokenize(expression, options): """Tokenize a chemical equation. :type expression: str :type options: bce.option.Option :param expression: The chemical equation. :param options: The options. :rtype : list[Token] :return: The token list. """ # Get the language ID. lang_id = _l10n_opt.OptionWrapper(options).get_language_id() # Initialize the result container. result = [] # Initialize the cursor. cursor = 0 while cursor < len(expression): # Get current character. cur_ch = expression[cursor] if cur_ch == "+": # Add a plus token. result.append(create_operator_plus_token(len(result), cursor)) # Next position. cursor += 1 elif cur_ch == "-": # Add a minus token. result.append(create_operator_minus_token(len(result), cursor)) # Next position. cursor += 1 elif cur_ch == ";": # Add a separator token. result.append(create_operator_separator_token(len(result), cursor)) # Next position. cursor += 1 elif cur_ch == "=": # Add an equal sign token. result.append(create_equal_token(len(result), cursor)) # Next position. cursor += 1 else: # Initialize the stack. pm = _adt_stack.Stack() # Initialize the searching cursor. search_pos = cursor # Initialize the molecule symbol. molecule_symbol = "" while search_pos < len(expression): # Get current character. search_ch = expression[search_pos] if search_ch in ["(", "[", "{", "<"]: # Emulate pushing operation. pm.push(search_pos) # Add the character. molecule_symbol += search_ch elif search_ch in [")", "]", "}", ">"]: # Raise an error if there is no left parenthesis in the stack. if len(pm) == 0: err = _cm_error.Error( _ce_error.CEXP_PARENTHESIS_MISMATCH, _l10n_reg.get_message( lang_id, "parser.cexp.error.parenthesis_mismatch.description" ), options) err.push_traceback( expression, search_pos, search_pos, _l10n_reg.get_message( lang_id, "parser.cexp.error.parenthesis_mismatch.left")) raise err # Emulate popping operation. pm.pop() # Add the character. molecule_symbol += search_ch elif search_ch in ["+", "-", ";", "="] and len(pm) == 0: break else: # Add the character. molecule_symbol += search_ch # Move the searching cursor. search_pos += 1 # Raise an error if there are still some parentheses in the stack. if len(pm) != 0: err = _cm_error.Error( _ce_error.CEXP_PARENTHESIS_MISMATCH, _l10n_reg.get_message( lang_id, "parser.cexp.error.parenthesis_mismatch.description"), options) while len(pm) != 0: mismatched_pos = pm.pop() err.push_traceback( expression, mismatched_pos, mismatched_pos, _l10n_reg.get_message( lang_id, "parser.cexp.error.parenthesis_mismatch.right")) raise err # Add a molecule token. result.append( create_molecule_token(molecule_symbol, len(result), cursor)) # Set the cursor. cursor = search_pos # Add an end token. result.append(create_end_token(len(result), len(expression))) return result
def generate_ast(expression, token_list, options): """Generate an AST from the token list. :type expression: str :type token_list: list[bce.parser.molecule.token.Token] :type options: bce.option.Option :param expression: The origin expression. :param token_list: The token list. :param options: The options. :rtype : bce.parser.ast.molecule.ASTNodeHydrateGroup | bce.parser.ast.molecule.ASTNodeMolecule :return: The root node of the generated AST. """ # Get the language ID. lang_id = _l10n_opt.OptionWrapper(options).get_language_id() # Initialize the molecule status container. molecule_status = None # Initialize the state machine. state = _STATE_ROOT # Generate initial AST. root = _ml_ast_base.ASTNodeHydrateGroup() node = _ml_ast_base.ASTNodeMolecule(root) root.append_child(node) # Register the starting position. root.register_starting_position_in_source_text(0) node.register_starting_position_in_source_text(0) # Initialize the token cursor. cursor = 0 while True: # Get current token. token = token_list[cursor] if state == _STATE_ROOT: # Find molecule in parent nodes and current node. while node is not None and not node.is_molecule(): node = node.get_parent_node() if node is None: raise RuntimeError("BUG: Can't find molecule group.") # Redirect by rules. if token.is_operand() and len(node) == 0: state = _STATE_PREFIX_NUMBER elif token.is_symbol(): state = _STATE_ATOM elif token.is_abbreviation(): state = _STATE_ABBREVIATION elif token.is_left_parenthesis(): state = _STATE_LEFT_PARENTHESIS elif token.is_right_parenthesis(): state = _STATE_RIGHT_PARENTHESIS elif token.is_electronic_begin(): state = _STATE_ELECTRONIC elif token.is_hydrate_dot(): state = _STATE_HYDRATE_DOT elif token.is_status(): state = _STATE_MOLECULE_STATUS elif token.is_end(): break else: # Raise an error if the token can't be recognized. err = _cm_error.Error( _ml_error.MOLECULE_UNEXPECTED_TOKEN, _l10n_reg.get_message( lang_id, "parser.molecule.error.unexpected_token.description"), options) err.push_traceback( expression, token.get_position(), token.get_position() + len(token.get_symbol()) - 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.unexpected_token.other")) raise err elif state == _STATE_ATOM: # Create a new atom node and register its starting position. new_node = _ml_ast_base.ASTNodeAtom(token.get_symbol(), node) new_node.register_starting_position_in_source_text( token.get_position()) # Add the node to the molecule group. node.append_child(new_node) # Switch the node pointer to the new created node. node = new_node # Next token. cursor += 1 # Go to read the suffix number. state = _STATE_SUFFIX_NUMBER elif state == _STATE_ABBREVIATION: # Create a new abbreviation node and register its starting position. new_node = _ml_ast_base.ASTNodeAbbreviation( token.get_symbol()[1:-1], node) new_node.register_starting_position_in_source_text( token.get_position()) # Add the node to the molecule group. node.append_child(new_node) # Switch the node pointer to the new created node. node = new_node # Next token. cursor += 1 # Go to read the suffix number. state = _STATE_SUFFIX_NUMBER elif state == _STATE_LEFT_PARENTHESIS: # Create new nodes. new_hydrate_grp = _ml_ast_base.ASTNodeHydrateGroup() new_molecule = _ml_ast_base.ASTNodeMolecule(new_hydrate_grp) new_parenthesis = _ml_ast_base.ASTNodeParenthesisWrapper( new_hydrate_grp, node) # Link them correctly and them add the new created parenthesis node to the molecule group. new_hydrate_grp.set_parent_node(new_parenthesis) new_hydrate_grp.append_child(new_molecule) node.append_child(new_parenthesis) # Switch the node pointer to the new created molecule node. node = new_molecule # Register their starting positions. new_hydrate_grp.register_starting_position_in_source_text( token.get_position() + 1) new_molecule.register_starting_position_in_source_text( token.get_position() + 1) new_parenthesis.register_starting_position_in_source_text( token.get_position()) # Next token. cursor += 1 # Go to root state. state = _STATE_ROOT elif state == _STATE_RIGHT_PARENTHESIS: # Find parenthesis node in parent nodes and current node. while node is not None and not node.is_parenthesis(): # Register the ending position of current working node. node.register_ending_position_in_source_text( token.get_position() - 1) # Go to the parent node. node = node.get_parent_node() # Raise an error if the node can't be found. if node is None: err = _cm_error.Error( _ml_error.MOLECULE_PARENTHESIS_MISMATCH, _l10n_reg.get_message( lang_id, "parser.molecule.error.parenthesis_mismatch.description" ), options) err.push_traceback( expression, token.get_position(), token.get_position() + len(token.get_symbol()) - 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.parenthesis_mismatch.left")) raise err # Register the ending position of current working node. node.set_right_parenthesis_position(token.get_position()) # Next token. cursor += 1 # Go to read the suffix number. state = _STATE_SUFFIX_NUMBER elif state == _STATE_ELECTRONIC: # Save the starting position of the electronic descriptor. e_start_pos = token.get_position() # Next token. cursor += 1 token = token_list[cursor] # Try to read the prefix number. e_pfx = _math_cst.ONE e_pfx_start = token.get_position() has_e_pfx_number = False while token.is_operand(): # Mark the flag. has_e_pfx_number = True # Process the prefix number. e_pfx *= token.get_operand_value().simplify() # Next token. cursor += 1 token = token_list[cursor] # Simplify before checking. e_pfx = e_pfx.simplify() # Domain check. if e_pfx.is_negative or e_pfx.is_zero: err = _cm_error.Error( _ml_error.MOLECULE_DOMAIN_ERROR, _l10n_reg.get_message( lang_id, "parser.molecule.error.domain_error.description"), options) err.push_traceback( expression, e_pfx_start, token.get_position() - 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.domain_error.electronic_charge") ) raise err # Validate. if has_e_pfx_number and e_pfx == _math_cst.ONE: err = _cm_error.Error( _ml_error.MOLECULE_EXCEED_OPERAND, _l10n_reg.get_message( lang_id, "parser.molecule.error.exceed_operand.description"), options) err.push_traceback( expression, e_pfx_start, token.get_position() - 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.exceed_operand.electronic_charge" )) raise err # Process the electronic positivity flag. if token.is_electronic_positive_flag(): pass elif token.is_electronic_negative_flag(): e_pfx = -e_pfx else: if token.is_end(): err = _cm_error.Error( _ml_error.MOLECULE_PARENTHESIS_MISMATCH, _l10n_reg.get_message( lang_id, "parser.molecule.error.parenthesis_mismatch.description" ), options) err.push_traceback( expression, e_start_pos, token.get_position() - 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.parenthesis_mismatch.right") ) else: # Raise an error if current working token is not an electronic positivity flag. err = _cm_error.Error( _ml_error.MOLECULE_UNEXPECTED_TOKEN, _l10n_reg.get_message( lang_id, "parser.molecule.error.unexpected_token.description" ), options) err.push_traceback( expression, token.get_position(), token.get_position() + len(token.get_symbol()) - 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.unexpected_token.electronic_suffix" )) raise err # Next token. cursor += 1 token = token_list[cursor] # Raise an error if current working token is not '>'. if not token.is_electronic_end(): if token.is_end(): err = _cm_error.Error( _ml_error.MOLECULE_PARENTHESIS_MISMATCH, _l10n_reg.get_message( lang_id, "parser.molecule.error.parenthesis_mismatch.description" ), options) err.push_traceback( expression, e_start_pos, token.get_position() - 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.parenthesis_mismatch.right") ) else: err = _cm_error.Error( _ml_error.MOLECULE_UNEXPECTED_TOKEN, _l10n_reg.get_message( lang_id, "parser.molecule.error.unexpected_token.description" ), options) err.push_traceback( expression, token.get_position(), token.get_position() + len(token.get_symbol()) - 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.unexpected_token.electronic_end" )) raise err # Next token. cursor += 1 token = token_list[cursor] # Raise an error if the electronic descriptor is not at the end of a molecule block. if not (token.is_right_parenthesis() or token.is_hydrate_dot() or token.is_end() or token.is_status()): err = _cm_error.Error( _ml_error.MOLECULE_UNEXPECTED_TOKEN, _l10n_reg.get_message( lang_id, "parser.molecule.error.unexpected_token.description"), options) err.push_traceback( expression, e_start_pos, token.get_position() - 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.unexpected_token.electronic_misplaced" )) raise err # Set the electronic count. node.set_electronic_count(e_pfx) # Go to root state. state = _STATE_ROOT elif state == _STATE_HYDRATE_DOT: # Save the ending position of current working node. node.register_ending_position_in_source_text(token.get_position() - 1) # Go to parent node. node = node.get_parent_node() assert isinstance(node, _ml_ast_base.ASTNodeHydrateGroup) # Create a new molecule node and set its starting position. new_molecule = _ml_ast_base.ASTNodeMolecule(node) new_molecule.register_starting_position_in_source_text( token.get_position() + 1) # Add the new created molecule node to the hydrate group node. node.append_child(new_molecule) # Switch the node pointer to the new created molecule node. node = new_molecule # Next token. cursor += 1 # Go to root state. state = _STATE_ROOT elif state == _STATE_PREFIX_NUMBER: # Save the starting position of the prefix. pfx_start = token.get_position() # Read prefix numbers. has_pfx_number = False while token.is_operand(): # Mark the flag. has_pfx_number = True # Process the prefix number. node.set_prefix_number(node.get_prefix_number() * token.get_operand_value().simplify()) # Next token. cursor += 1 token = token_list[cursor] # Simplify before checking. pfx = node.get_prefix_number().simplify() # Domain check. if pfx.is_negative or pfx.is_zero: err = _cm_error.Error( _ml_error.MOLECULE_DOMAIN_ERROR, _l10n_reg.get_message( lang_id, "parser.molecule.error.domain_error.description"), options) err.push_traceback( expression, pfx_start, token.get_position() - 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.domain_error.prefix")) raise err # Validate. if has_pfx_number and pfx == _math_cst.ONE: err = _cm_error.Error( _ml_error.MOLECULE_EXCEED_OPERAND, _l10n_reg.get_message( lang_id, "parser.molecule.error.exceed_operand.description"), options) err.push_traceback( expression, pfx_start, token.get_position() - 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.exceed_operand.prefix")) raise err # Set the prefix number. node.set_prefix_number(pfx) # Go to root state. state = _STATE_ROOT elif state == _STATE_SUFFIX_NUMBER: # Save the starting position of the suffix. sfx_start = token.get_position() # Read suffix numbers. has_sfx_number = False while token.is_operand(): # Mark the flag. has_sfx_number = True # Process the suffix number. node.set_suffix_number(node.get_suffix_number() * token.get_operand_value().simplify()) # Next token. cursor += 1 token = token_list[cursor] # Get the suffix. sfx = node.get_suffix_number() # Simplify before checking. sfx = sfx.simplify() # Domain check. if sfx.is_negative or sfx.is_zero: err = _cm_error.Error( _ml_error.MOLECULE_DOMAIN_ERROR, _l10n_reg.get_message( lang_id, "parser.molecule.error.domain_error.description"), options) err.push_traceback( expression, sfx_start, token.get_position() - 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.domain_error.suffix")) raise err # Validate. if has_sfx_number and sfx == _math_cst.ONE: err = _cm_error.Error( _ml_error.MOLECULE_EXCEED_OPERAND, _l10n_reg.get_message( lang_id, "parser.molecule.error.exceed_operand.description"), options) err.push_traceback( expression, sfx_start, token.get_position() - 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.exceed_operand.suffix")) raise err # Register the ending position of current working node. node.register_ending_position_in_source_text(token.get_position() - 1) # Go to root state. state = _STATE_ROOT elif state == _STATE_MOLECULE_STATUS: # Raise an error if the token is not at the end of the molecule. if not token_list[cursor + 1].is_end(): err = _cm_error.Error( _ml_error.MOLECULE_UNEXPECTED_TOKEN, _l10n_reg.get_message( lang_id, "parser.molecule.error.unexpected_token.description"), options) err.push_traceback( expression, token.get_position(), token.get_position() + len(token.get_symbol()) - 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.unexpected_token.electronic_misplaced" )) raise err # Fetch the molecule status. if token.is_gas_status(): molecule_status = _ml_ast_base.STATUS_GAS elif token.is_liquid_status(): molecule_status = _ml_ast_base.STATUS_LIQUID elif token.is_solid_status(): molecule_status = _ml_ast_base.STATUS_SOLID elif token.is_aqueous_status(): molecule_status = _ml_ast_base.STATUS_AQUEOUS else: raise RuntimeError("BUG: Unrecognized status.") # Next token. cursor += 1 # Go to root state. state = _STATE_ROOT else: raise RuntimeError("BUG: Unrecognized state.") # Get the ending position. ending_pos = token_list[-1].get_position() - 1 # Initialize the parenthesis-mismatched flag. mismatch_flag = False # Pre-create an error. err = _cm_error.Error( _ml_error.MOLECULE_PARENTHESIS_MISMATCH, _l10n_reg.get_message( lang_id, "parser.molecule.error.parenthesis_mismatch.description"), options) while node is not None: # Register the ending position of current working node. node.register_ending_position_in_source_text(ending_pos) # Mark the error flag and add an error description if current node is a parenthesis node. if node.is_parenthesis(): mismatch_flag = True err.push_traceback( expression, node.get_starting_position_in_source_text(), node.get_starting_position_in_source_text(), _l10n_reg.get_message( lang_id, "parser.molecule.error.parenthesis_mismatch.right")) # Go to parent node. node = node.get_parent_node() # Raise an error if we have met at least 1 parenthesis node. if mismatch_flag: raise err # Now, we have constructed the whole AST, but we got a lot of useless hydrate group node. # So we have to remove them (all hydrate groups nodes which have only 1 child). # Get iterate order. unpack_order = _ml_ast_bfs.do_bfs(root, True) # Initialize unpacked node container. unpacked = {} for node in unpack_order: if node.is_hydrate_group(): assert isinstance(node, _ml_ast_base.ASTNodeHydrateGroup) if len(node) == 1: # Get the child node and reset its parent child = unpacked[id(node[0])] child.set_parent_node(node.get_parent_node()) # Save the unpack result. unpacked[id(node)] = child else: # Update children links. for child_id in range(0, len(node)): node[child_id] = unpacked[id(node[child_id])] # Save the unpack result. unpacked[id(node)] = node elif node.is_molecule(): assert isinstance(node, _ml_ast_base.ASTNodeMolecule) # Update children links. for child_id in range(0, len(node)): node[child_id] = unpacked[id(node[child_id])] # Save the unpack result. unpacked[id(node)] = node elif node.is_parenthesis(): assert isinstance(node, _ml_ast_base.ASTNodeParenthesisWrapper) # Update children links. node.set_inner_node(unpacked[id(node.get_inner_node())]) # Save the unpack result. unpacked[id(node)] = node else: # Save the unpack result. unpacked[id(node)] = node # Set molecule status. root = unpacked[id(root)] """:type : bce.parser.ast.molecule.ASTNodeHydrateGroup | bce.parser.ast.molecule.ASTNodeMolecule""" root.set_status(molecule_status) return root
def parse_to_rpn(expression, token_list, options, protected_header_enabled=False, protected_header_prefix="X"): """Parse an infix math expression to RPN. :type expression: str :type token_list: list[bce.parser.mexp.token.Token] :type options: bce.option.Option :type protected_header_enabled: bool :type protected_header_prefix: str :param expression: The infix math expression. :param token_list: The tokenized infix math expression. :param options: The options. :param protected_header_enabled: Whether the protected headers are enabled. :param protected_header_prefix: The prefix of the protected headers. :rtype : list[bce.parser.mexp.token.Token] :return: The RPN token list. :raise bce.parser.common.error.Error: Raise when a parser error occurred. """ # Initialize lang_id = _l10n_opt.OptionWrapper(options).get_language_id() token_id = 0 token_count = len(token_list) rpn = _RPNProcessor() current_argc = 0 required_argc = 0 prev_separator_position = -1 parenthesis_mapping = {")": "(", "]": "[", "}": "{"} parenthesis_stack = _adt_stack.Stack() in_function = False while token_id < token_count: # Get current token. token = token_list[token_id] # Get previous token. if token_id != 0: prev_tok = token_list[token_id - 1] else: prev_tok = None if token.is_operand(): if token.is_symbol_operand(): # Check the protected header. if protected_header_enabled and token.get_symbol().startswith( protected_header_prefix): err = _cm_error.Error( _mexp_errors.MEXP_USE_PROTECTED_HEADER, _l10n_reg.get_message( lang_id, "parser.mexp.error.protected_header.description"), options) err.push_traceback( expression, token.get_position(), token.get_position() + len(token.get_symbol()) - 1, _l10n_reg.get_message( lang_id, "parser.mexp.error.protected_header.message"), replace_map={"$1": protected_header_prefix}) raise err if prev_tok is not None: if prev_tok.is_right_parenthesis(): if token.is_symbol_operand(): # Do completion: # ([expr])[unknown] => ([expr])*[unknown] # # For example: # (3-y)x => (3-y)*x rpn.add_operator( _mexp_token.create_multiply_operator_token()) else: # Numeric parenthesis suffix was not supported. # # For example: # (x-y)3 # ^ # Requires a '*' before this token. err = _cm_error.Error( _mexp_errors.MEXP_MISSING_OPERATOR, _l10n_reg.get_message( lang_id, "parser.mexp.error.missing_operator.description" ), options) err.push_traceback( expression, token.get_position(), token.get_position() + len(token.get_symbol()) - 1, _l10n_reg.get_message( lang_id, "parser.mexp.error.missing_operator.multiply_before" )) raise err if prev_tok.is_operand(): # Do completion: # [number][symbol] => [number]*[symbol] # # For example: # 4x => 4*x rpn.add_operator( _mexp_token.create_multiply_operator_token()) # Process the token. rpn.add_operand(token) # Go to next token. token_id += 1 continue elif token.is_function(): # Raise an error if the function is unsupported. if _mexp_functions.find_function(token.get_symbol()) is None: err = _cm_error.Error( _mexp_errors.MEXP_FUNCTION_UNSUPPORTED, _l10n_reg.get_message( lang_id, "parser.mexp.error.unsupported_function.description"), options) err.push_traceback( expression, token.get_position(), token.get_position() + len(token.get_symbol()) - 1, _l10n_reg.get_message( lang_id, "parser.mexp.error.unsupported_function.message"), replace_map={"$1": token.get_symbol()}) raise err if prev_tok is not None and (prev_tok.is_operand() or prev_tok.is_right_parenthesis()): # Do completion: # [num][fn] => [num]*[fn] # # For example: # 4pow(2,3) => 4*pow(2,3) rpn.add_operator(_mexp_token.create_multiply_operator_token()) # Process the token. rpn.add_function(token) # Go to next token. token_id += 1 continue elif token.is_operator(): # Get the operator. op = _mexp_operators.OPERATORS[token.get_subtype()] # Check operands. if op.is_required_left_operand(): _check_left_operand(expression, token_list, token_id, options) if op.is_required_right_operand(): _check_right_operand(expression, token_list, token_id, options) # Process the token. rpn.add_operator(token) # Go to next token. token_id += 1 continue elif token.is_left_parenthesis(): # Save state. parenthesis_stack.push( _ParenthesisStackItem(token.get_symbol(), token_id, in_function, current_argc, required_argc, prev_separator_position)) current_argc = 0 prev_separator_position = token_id # Set function state and get required argument count. if prev_tok is not None and prev_tok.is_function(): # Mark the flag. in_function = True # Get the function object. fn_object = _mexp_functions.find_function( prev_tok.get_symbol()) if fn_object is None: raise RuntimeError("BUG: Function object is None.") # Get the required argument count. required_argc = fn_object.get_argument_count() else: # Clear the flag. in_function = False required_argc = 0 if prev_tok is not None and (prev_tok.is_right_parenthesis() or prev_tok.is_operand()): # Do completion # [lp][expr][rp][lp][expr][rp] => [lp][expr][rp]*[lp][expr][rp] # # For example: # (2+3)(4+2) => (2+3)*(4+2) rpn.add_operator(_mexp_token.create_multiply_operator_token()) # Process the token. rpn.add_left_parenthesis(token) # Go to next token. token_id += 1 continue elif token.is_right_parenthesis(): # Raise an error if there's no content between two separators. if prev_separator_position + 1 == token_id: err = _cm_error.Error( _mexp_errors.MEXP_NO_CONTENT, _l10n_reg.get_message( lang_id, "parser.mexp.error.no_content.description"), options) if prev_tok.is_left_parenthesis(): err.push_traceback( expression, prev_tok.get_position(), token.get_position(), _l10n_reg.get_message( lang_id, "parser.mexp.error.no_content.in_parentheses")) else: err.push_traceback( expression, prev_tok.get_position(), token.get_position(), _l10n_reg.get_message( lang_id, "parser.mexp.error.no_content.in_argument")) raise err # Raise an error if there's no left parenthesis to be matched with. if len(parenthesis_stack) == 0: err = _cm_error.Error( _mexp_errors.MEXP_PARENTHESIS_MISMATCH, _l10n_reg.get_message( lang_id, "parser.mexp.error.parenthesis_mismatch.description"), options) err.push_traceback( expression, token.get_position(), token.get_position(), _l10n_reg.get_message( lang_id, "parser.mexp.error.parenthesis_mismatch.left")) raise err # Get the top item of the stack. p_item = parenthesis_stack.pop() # Get the symbol of the parenthesis matches with current token. p_matched_sym = parenthesis_mapping[token.get_symbol()] # Raise an error if the parenthesis was mismatched. if p_matched_sym != p_item.get_symbol(): err = _cm_error.Error( _mexp_errors.MEXP_PARENTHESIS_MISMATCH, _l10n_reg.get_message( lang_id, "parser.mexp.error.parenthesis_mismatch.description"), options) err.push_traceback( expression, token.get_position(), token.get_position(), _l10n_reg.get_message( lang_id, "parser.mexp.error.parenthesis_mismatch.incorrect"), replace_map={"$1": p_matched_sym}) raise err if in_function: current_argc += 1 # Raise an error if the argument count was not matched. if current_argc != required_argc: fn_token = token_list[p_item.get_token_id() - 1] err = _cm_error.Error( _mexp_errors.MEXP_FUNCTION_ARGUMENT_COUNT_MISMATCH, _l10n_reg.get_message( lang_id, "parser.mexp.error.argument_count_mismatch.description" ), options) err.push_traceback( expression, fn_token.get_position(), fn_token.get_position() + len(fn_token.get_symbol()) - 1, _l10n_reg.get_message( lang_id, "parser.mexp.error.argument_count_mismatch.message" ), { "$1": str(required_argc), "$2": str(current_argc) }) raise err # Restore state. in_function = p_item.is_in_function() current_argc = p_item.get_current_argument_count() required_argc = p_item.get_required_argument_count() prev_separator_position = p_item.get_previous_separator_position() # Process the token. rpn.add_right_parenthesis() # Go to next token. token_id += 1 continue elif token.is_separator(): # Raise an error if we're not in function now. if not in_function: err = _cm_error.Error( _mexp_errors.MEXP_ILLEGAL_ARGUMENT_SEPARATOR, _l10n_reg.get_message( lang_id, "parser.mexp.error.illegal_separator.description"), options) err.push_traceback( expression, token.get_position(), token.get_position(), _l10n_reg.get_message( lang_id, "parser.mexp.error.illegal_separator.message")) raise err # Raise an error if there's no content between two separators. if prev_separator_position + 1 == token_id: err = _cm_error.Error( _mexp_errors.MEXP_NO_CONTENT, _l10n_reg.get_message( lang_id, "parser.mexp.error.no_content.description"), options) err.push_traceback( expression, prev_tok.get_position(), token.get_position(), _l10n_reg.get_message( lang_id, "parser.mexp.error.no_content.in_argument")) raise err # Save separator position. prev_separator_position = token_id # Increase argument counter. current_argc += 1 # Process the token. rpn.add_separator() # Go to next token. token_id += 1 continue else: raise RuntimeError("Never reach this condition.") # Raise an error if there are still some left parentheses in the stack. if len(parenthesis_stack) != 0: err = _cm_error.Error( _mexp_errors.MEXP_PARENTHESIS_MISMATCH, _l10n_reg.get_message( lang_id, "parser.mexp.error.parenthesis_mismatch.description"), options) while len(parenthesis_stack) != 0: p_item = parenthesis_stack.pop() p_token = token_list[p_item.get_token_id()] err.push_traceback( expression, p_token.get_position(), p_token.get_position(), _l10n_reg.get_message( lang_id, "parser.mexp.error.parenthesis_mismatch.right")) raise err # Pop all items off from the stack and push them onto the RPN token list. rpn.finalize() # Return the RPN token list. return rpn.get_rpn()