def parse_ast(expression, root_node, options, mexp_protected_header_enabled=False, mexp_protected_header_prefix="X"): """Parse an AST. :type expression: str :type root_node: bce.parser.ast.molecule.ASTNodeHydrateGroup | bce.parser.ast.molecule.ASTNodeMolecule :type options: bce.option.Option :type mexp_protected_header_enabled: bool :type mexp_protected_header_prefix: str :param expression: The origin expression. :param root_node: The root node of the AST. :param options: The options. :param mexp_protected_header_enabled: Whether the MEXP protected headers are enabled. :param mexp_protected_header_prefix: The prefix of the MEXP protected headers. :rtype : dict :return: The parsed atoms dictionary. """ # Wrap the interface option. if_opt = _interface_opt.OptionWrapper(options) # Wrap the molecule option. molecule_opt = _ml_opt.OptionWrapper(options) # Get the language ID. lang_id = _l10n_opt.OptionWrapper(options).get_language_id() # Get the iteration order. work_list = _ml_ast_bfs.do_bfs(root_node, True) # Initialize the parsed node container. parsed = {} """:type : dict[int, MergeUtil]""" # Iterate nodes from the leaves to the root. for work_node in work_list: if work_node.is_hydrate_group() or work_node.is_molecule(): assert isinstance(work_node, _ast_base.ASTNodeHydrateGroup) or \ isinstance(work_node, _ast_base.ASTNodeMolecule) # Get the prefix number. coeff = work_node.get_prefix_number() # Initialize a new merge utility. build = MergeUtil() # Process the electronics. if work_node.is_molecule(): el_charge = work_node.get_electronic_count().simplify() if not el_charge.is_zero: build.add("e", el_charge * coeff) # Iterate all children. for child_id in range(0, len(work_node)): # Get child node and its parsing result. child = work_node[child_id] child_parsed = parsed[id(child)] # Content check. if work_node.is_hydrate_group() and len(child_parsed) == 0: assert isinstance(child, _ast_base.ASTNodeMolecule) err = _cm_error.Error( _ml_error.MOLECULE_NO_CONTENT, _l10n_reg.get_message( lang_id, "parser.molecule.error.no_content.description" ), options ) if child_id == 0: err.push_traceback( expression, child.get_ending_position_in_source_text() + 1, child.get_ending_position_in_source_text() + 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.no_content.before" ) ) elif child_id == len(work_node) - 1: err.push_traceback( expression, child.get_starting_position_in_source_text() - 1, child.get_starting_position_in_source_text() - 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.no_content.after" ) ) else: err.push_traceback( expression, child.get_starting_position_in_source_text() - 1, child.get_ending_position_in_source_text() + 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.no_content.inside" ) ) raise err # Merge. build.merge(child_parsed, coeff) # Do simplifying. _macro_simplify(expression, build, work_node, options) # Save the parsed result. parsed[id(work_node)] = build elif work_node.is_atom(): assert isinstance(work_node, _ast_base.ASTNodeAtom) # Get suffix number. coeff = work_node.get_suffix_number() # Initialize a new merge utility. build = MergeUtil() # Add the atom. build.add(work_node.get_atom_symbol(), coeff) # Save the parsed result. parsed[id(work_node)] = build elif work_node.is_parenthesis(): assert isinstance(work_node, _ast_base.ASTNodeParenthesisWrapper) # Get suffix number. coeff = work_node.get_suffix_number() # Initialize a new merge utility. build = MergeUtil() # Get inner node and its parsing result. inner_parsed = parsed[id(work_node.get_inner_node())] # Content check. if len(inner_parsed) == 0: err = _cm_error.Error( _ml_error.MOLECULE_NO_CONTENT, _l10n_reg.get_message( lang_id, "parser.molecule.error.no_content.description" ), options ) err.push_traceback( expression, work_node.get_starting_position_in_source_text(), work_node.get_ending_position_in_source_text(), _l10n_reg.get_message( lang_id, "parser.molecule.error.no_content.inside" ) ) raise err # Merge. build.merge(inner_parsed, coeff) # Do simplifying. _macro_simplify(expression, build, work_node, options) # Save the parsed result. parsed[id(work_node)] = build elif work_node.is_abbreviation(): assert isinstance(work_node, _ast_base.ASTNodeAbbreviation) # Get the abbreviation symbol. abbr_symbol = work_node.get_abbreviation_symbol() # Check symbol length. if len(abbr_symbol) == 0: err = _cm_error.Error( _ml_error.MOLECULE_NO_CONTENT, _l10n_reg.get_message( lang_id, "parser.molecule.error.no_content.description" ), options ) err.push_traceback( expression, work_node.get_starting_position_in_source_text(), work_node.get_ending_position_in_source_text(), _l10n_reg.get_message( lang_id, "parser.molecule.error.no_content.inside" ) ) raise err # Get the abbreviation mapping. abbr_mapping = molecule_opt.get_abbreviation_mapping() # Check the existence. if abbr_symbol not in abbr_mapping: err = _cm_error.Error( _ml_error.MOLECULE_UNSUPPORTED_ABBREVIATION, _l10n_reg.get_message( lang_id, "parser.molecule.error.unsupported_abbreviation.description" ), options ) err.push_traceback( expression, work_node.get_starting_position_in_source_text() + 1, work_node.get_ending_position_in_source_text() - 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.unsupported_abbreviation.message" ) ) raise err abbr_expression = abbr_mapping[abbr_symbol] try: abbr_parser = if_opt.get_molecule_parser() abbr_ast_root = abbr_parser.parse_expression( abbr_expression, options, mexp_protected_header_enabled=mexp_protected_header_enabled, mexp_protected_header_prefix=mexp_protected_header_prefix ) abbr_resolved = abbr_parser.parse_ast( abbr_expression, abbr_ast_root, options, mexp_protected_header_enabled=mexp_protected_header_enabled, mexp_protected_header_prefix=mexp_protected_header_prefix ) except _cm_error.Error as err: err.push_traceback( abbr_expression, 0, len(abbr_expression) - 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.parsing_abbreviation.expand" ) ) err.push_traceback( expression, work_node.get_starting_position_in_source_text() + 1, work_node.get_ending_position_in_source_text() - 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.parsing_abbreviation.origin" ) ) raise err # Initialize a new merge utility. build = MergeUtil() # Get the suffix number. coeff = work_node.get_suffix_number() # Add atoms. for atom_symbol in abbr_resolved: build.add(atom_symbol, abbr_resolved[atom_symbol] * coeff) # Do simplifying. _macro_simplify(expression, build, work_node, options) # Save the parsed result. parsed[id(work_node)] = build else: raise RuntimeError("Never reach this condition.") # Get the parsing result of the root node. root_node_parsed = parsed[id(root_node)] # Content check. if len(root_node_parsed) == 0: err = _cm_error.Error( _ml_error.MOLECULE_NO_CONTENT, _l10n_reg.get_message( lang_id, "parser.molecule.error.no_content.description" ), options ) err.push_traceback( expression, 0, len(expression) - 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.no_content.inside" ) ) raise err return root_node_parsed.get_data()
def print_ast(root_node, mexp_parser): """Print an AST to text. :type root_node: bce.parser.ast.molecule.ASTNodeHydrateGroup | bce.parser.ast.molecule.ASTNodeMolecule :type mexp_parser: bce.parser.interface.mexp_parser.MathExpressionParserInterface :param root_node: The root node of the AST. :param mexp_parser: The math expression parser. :rtype : str :return: The printed expression. """ # Get the printing order. work_order = _ml_ast_bfs.do_bfs(root_node, True) # Initialize the printing result container. printed = {} for work_node in work_order: if work_node.is_hydrate_group(): assert isinstance(work_node, _ml_ast_base.ASTNodeHydrateGroup) # Print the prefix number part. pfx = work_node.get_prefix_number().simplify() if pfx != _math_cst.ONE: model = _print_operand(pfx, mexp_parser) + "(%s)" else: model = "%s" # Print children nodes. inner = printed[id(work_node[0])] for child_id in range(1, len(work_node)): inner += "." + printed[id(work_node[child_id])] # Save printing result. printed[id(work_node)] = model % inner elif work_node.is_molecule(): assert isinstance(work_node, _ml_ast_base.ASTNodeMolecule) # Print the prefix number part. pfx = work_node.get_prefix_number().simplify() build = _print_operand(pfx, mexp_parser) # Print children nodes. for child_id in range(0, len(work_node)): build += printed[id(work_node[child_id])] # Print the electronic part. el_charge = work_node.get_electronic_count().simplify() if not el_charge.is_zero: build += _print_electronic(el_charge, mexp_parser) # Save printing result. printed[id(work_node)] = build elif work_node.is_atom(): assert isinstance(work_node, _ml_ast_base.ASTNodeAtom) # Print and save the result. printed[id( work_node)] = work_node.get_atom_symbol() + _print_suffix( work_node, mexp_parser) elif work_node.is_parenthesis(): assert isinstance(work_node, _ml_ast_base.ASTNodeParenthesisWrapper) # Print and save the result. printed[id(work_node)] = "(%s)%s" % ( printed[id(work_node.get_inner_node())], _print_suffix(work_node, mexp_parser)) elif work_node.is_abbreviation(): assert isinstance(work_node, _ml_ast_base.ASTNodeAbbreviation) # Print and save the result. printed[id(work_node)] = "[%s]%s" % ( work_node.get_abbreviation_symbol(), _print_suffix(work_node, mexp_parser)) else: raise RuntimeError("BUG: Unhandled AST node type.") # Post process - add status. post_process = printed[id(root_node)] if root_node.is_gas_status(): post_process += "(g)" elif root_node.is_liquid_status(): post_process += "(l)" elif root_node.is_solid_status(): post_process += "(s)" elif root_node.is_aqueous_status(): post_process += "(aq)" else: pass return post_process
def generate_ast(expression, token_list, options): """Generate an AST from the token list. :type expression: str :type token_list: list[bce.parser.molecule.token.Token] :type options: bce.option.Option :param expression: The origin expression. :param token_list: The token list. :param options: The options. :rtype : bce.parser.ast.molecule.ASTNodeHydrateGroup | bce.parser.ast.molecule.ASTNodeMolecule :return: The root node of the generated AST. """ # Get the language ID. lang_id = _l10n_opt.OptionWrapper(options).get_language_id() # Initialize the molecule status container. molecule_status = None # Initialize the state machine. state = _STATE_ROOT # Generate initial AST. root = _ml_ast_base.ASTNodeHydrateGroup() node = _ml_ast_base.ASTNodeMolecule(root) root.append_child(node) # Register the starting position. root.register_starting_position_in_source_text(0) node.register_starting_position_in_source_text(0) # Initialize the token cursor. cursor = 0 while True: # Get current token. token = token_list[cursor] if state == _STATE_ROOT: # Find molecule in parent nodes and current node. while node is not None and not node.is_molecule(): node = node.get_parent_node() if node is None: raise RuntimeError("BUG: Can't find molecule group.") # Redirect by rules. if token.is_operand() and len(node) == 0: state = _STATE_PREFIX_NUMBER elif token.is_symbol(): state = _STATE_ATOM elif token.is_abbreviation(): state = _STATE_ABBREVIATION elif token.is_left_parenthesis(): state = _STATE_LEFT_PARENTHESIS elif token.is_right_parenthesis(): state = _STATE_RIGHT_PARENTHESIS elif token.is_electronic_begin(): state = _STATE_ELECTRONIC elif token.is_hydrate_dot(): state = _STATE_HYDRATE_DOT elif token.is_status(): state = _STATE_MOLECULE_STATUS elif token.is_end(): break else: # Raise an error if the token can't be recognized. err = _cm_error.Error( _ml_error.MOLECULE_UNEXPECTED_TOKEN, _l10n_reg.get_message( lang_id, "parser.molecule.error.unexpected_token.description"), options) err.push_traceback( expression, token.get_position(), token.get_position() + len(token.get_symbol()) - 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.unexpected_token.other")) raise err elif state == _STATE_ATOM: # Create a new atom node and register its starting position. new_node = _ml_ast_base.ASTNodeAtom(token.get_symbol(), node) new_node.register_starting_position_in_source_text( token.get_position()) # Add the node to the molecule group. node.append_child(new_node) # Switch the node pointer to the new created node. node = new_node # Next token. cursor += 1 # Go to read the suffix number. state = _STATE_SUFFIX_NUMBER elif state == _STATE_ABBREVIATION: # Create a new abbreviation node and register its starting position. new_node = _ml_ast_base.ASTNodeAbbreviation( token.get_symbol()[1:-1], node) new_node.register_starting_position_in_source_text( token.get_position()) # Add the node to the molecule group. node.append_child(new_node) # Switch the node pointer to the new created node. node = new_node # Next token. cursor += 1 # Go to read the suffix number. state = _STATE_SUFFIX_NUMBER elif state == _STATE_LEFT_PARENTHESIS: # Create new nodes. new_hydrate_grp = _ml_ast_base.ASTNodeHydrateGroup() new_molecule = _ml_ast_base.ASTNodeMolecule(new_hydrate_grp) new_parenthesis = _ml_ast_base.ASTNodeParenthesisWrapper( new_hydrate_grp, node) # Link them correctly and them add the new created parenthesis node to the molecule group. new_hydrate_grp.set_parent_node(new_parenthesis) new_hydrate_grp.append_child(new_molecule) node.append_child(new_parenthesis) # Switch the node pointer to the new created molecule node. node = new_molecule # Register their starting positions. new_hydrate_grp.register_starting_position_in_source_text( token.get_position() + 1) new_molecule.register_starting_position_in_source_text( token.get_position() + 1) new_parenthesis.register_starting_position_in_source_text( token.get_position()) # Next token. cursor += 1 # Go to root state. state = _STATE_ROOT elif state == _STATE_RIGHT_PARENTHESIS: # Find parenthesis node in parent nodes and current node. while node is not None and not node.is_parenthesis(): # Register the ending position of current working node. node.register_ending_position_in_source_text( token.get_position() - 1) # Go to the parent node. node = node.get_parent_node() # Raise an error if the node can't be found. if node is None: err = _cm_error.Error( _ml_error.MOLECULE_PARENTHESIS_MISMATCH, _l10n_reg.get_message( lang_id, "parser.molecule.error.parenthesis_mismatch.description" ), options) err.push_traceback( expression, token.get_position(), token.get_position() + len(token.get_symbol()) - 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.parenthesis_mismatch.left")) raise err # Register the ending position of current working node. node.set_right_parenthesis_position(token.get_position()) # Next token. cursor += 1 # Go to read the suffix number. state = _STATE_SUFFIX_NUMBER elif state == _STATE_ELECTRONIC: # Save the starting position of the electronic descriptor. e_start_pos = token.get_position() # Next token. cursor += 1 token = token_list[cursor] # Try to read the prefix number. e_pfx = _math_cst.ONE e_pfx_start = token.get_position() has_e_pfx_number = False while token.is_operand(): # Mark the flag. has_e_pfx_number = True # Process the prefix number. e_pfx *= token.get_operand_value().simplify() # Next token. cursor += 1 token = token_list[cursor] # Simplify before checking. e_pfx = e_pfx.simplify() # Domain check. if e_pfx.is_negative or e_pfx.is_zero: err = _cm_error.Error( _ml_error.MOLECULE_DOMAIN_ERROR, _l10n_reg.get_message( lang_id, "parser.molecule.error.domain_error.description"), options) err.push_traceback( expression, e_pfx_start, token.get_position() - 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.domain_error.electronic_charge") ) raise err # Validate. if has_e_pfx_number and e_pfx == _math_cst.ONE: err = _cm_error.Error( _ml_error.MOLECULE_EXCEED_OPERAND, _l10n_reg.get_message( lang_id, "parser.molecule.error.exceed_operand.description"), options) err.push_traceback( expression, e_pfx_start, token.get_position() - 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.exceed_operand.electronic_charge" )) raise err # Process the electronic positivity flag. if token.is_electronic_positive_flag(): pass elif token.is_electronic_negative_flag(): e_pfx = -e_pfx else: if token.is_end(): err = _cm_error.Error( _ml_error.MOLECULE_PARENTHESIS_MISMATCH, _l10n_reg.get_message( lang_id, "parser.molecule.error.parenthesis_mismatch.description" ), options) err.push_traceback( expression, e_start_pos, token.get_position() - 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.parenthesis_mismatch.right") ) else: # Raise an error if current working token is not an electronic positivity flag. err = _cm_error.Error( _ml_error.MOLECULE_UNEXPECTED_TOKEN, _l10n_reg.get_message( lang_id, "parser.molecule.error.unexpected_token.description" ), options) err.push_traceback( expression, token.get_position(), token.get_position() + len(token.get_symbol()) - 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.unexpected_token.electronic_suffix" )) raise err # Next token. cursor += 1 token = token_list[cursor] # Raise an error if current working token is not '>'. if not token.is_electronic_end(): if token.is_end(): err = _cm_error.Error( _ml_error.MOLECULE_PARENTHESIS_MISMATCH, _l10n_reg.get_message( lang_id, "parser.molecule.error.parenthesis_mismatch.description" ), options) err.push_traceback( expression, e_start_pos, token.get_position() - 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.parenthesis_mismatch.right") ) else: err = _cm_error.Error( _ml_error.MOLECULE_UNEXPECTED_TOKEN, _l10n_reg.get_message( lang_id, "parser.molecule.error.unexpected_token.description" ), options) err.push_traceback( expression, token.get_position(), token.get_position() + len(token.get_symbol()) - 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.unexpected_token.electronic_end" )) raise err # Next token. cursor += 1 token = token_list[cursor] # Raise an error if the electronic descriptor is not at the end of a molecule block. if not (token.is_right_parenthesis() or token.is_hydrate_dot() or token.is_end() or token.is_status()): err = _cm_error.Error( _ml_error.MOLECULE_UNEXPECTED_TOKEN, _l10n_reg.get_message( lang_id, "parser.molecule.error.unexpected_token.description"), options) err.push_traceback( expression, e_start_pos, token.get_position() - 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.unexpected_token.electronic_misplaced" )) raise err # Set the electronic count. node.set_electronic_count(e_pfx) # Go to root state. state = _STATE_ROOT elif state == _STATE_HYDRATE_DOT: # Save the ending position of current working node. node.register_ending_position_in_source_text(token.get_position() - 1) # Go to parent node. node = node.get_parent_node() assert isinstance(node, _ml_ast_base.ASTNodeHydrateGroup) # Create a new molecule node and set its starting position. new_molecule = _ml_ast_base.ASTNodeMolecule(node) new_molecule.register_starting_position_in_source_text( token.get_position() + 1) # Add the new created molecule node to the hydrate group node. node.append_child(new_molecule) # Switch the node pointer to the new created molecule node. node = new_molecule # Next token. cursor += 1 # Go to root state. state = _STATE_ROOT elif state == _STATE_PREFIX_NUMBER: # Save the starting position of the prefix. pfx_start = token.get_position() # Read prefix numbers. has_pfx_number = False while token.is_operand(): # Mark the flag. has_pfx_number = True # Process the prefix number. node.set_prefix_number(node.get_prefix_number() * token.get_operand_value().simplify()) # Next token. cursor += 1 token = token_list[cursor] # Simplify before checking. pfx = node.get_prefix_number().simplify() # Domain check. if pfx.is_negative or pfx.is_zero: err = _cm_error.Error( _ml_error.MOLECULE_DOMAIN_ERROR, _l10n_reg.get_message( lang_id, "parser.molecule.error.domain_error.description"), options) err.push_traceback( expression, pfx_start, token.get_position() - 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.domain_error.prefix")) raise err # Validate. if has_pfx_number and pfx == _math_cst.ONE: err = _cm_error.Error( _ml_error.MOLECULE_EXCEED_OPERAND, _l10n_reg.get_message( lang_id, "parser.molecule.error.exceed_operand.description"), options) err.push_traceback( expression, pfx_start, token.get_position() - 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.exceed_operand.prefix")) raise err # Set the prefix number. node.set_prefix_number(pfx) # Go to root state. state = _STATE_ROOT elif state == _STATE_SUFFIX_NUMBER: # Save the starting position of the suffix. sfx_start = token.get_position() # Read suffix numbers. has_sfx_number = False while token.is_operand(): # Mark the flag. has_sfx_number = True # Process the suffix number. node.set_suffix_number(node.get_suffix_number() * token.get_operand_value().simplify()) # Next token. cursor += 1 token = token_list[cursor] # Get the suffix. sfx = node.get_suffix_number() # Simplify before checking. sfx = sfx.simplify() # Domain check. if sfx.is_negative or sfx.is_zero: err = _cm_error.Error( _ml_error.MOLECULE_DOMAIN_ERROR, _l10n_reg.get_message( lang_id, "parser.molecule.error.domain_error.description"), options) err.push_traceback( expression, sfx_start, token.get_position() - 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.domain_error.suffix")) raise err # Validate. if has_sfx_number and sfx == _math_cst.ONE: err = _cm_error.Error( _ml_error.MOLECULE_EXCEED_OPERAND, _l10n_reg.get_message( lang_id, "parser.molecule.error.exceed_operand.description"), options) err.push_traceback( expression, sfx_start, token.get_position() - 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.exceed_operand.suffix")) raise err # Register the ending position of current working node. node.register_ending_position_in_source_text(token.get_position() - 1) # Go to root state. state = _STATE_ROOT elif state == _STATE_MOLECULE_STATUS: # Raise an error if the token is not at the end of the molecule. if not token_list[cursor + 1].is_end(): err = _cm_error.Error( _ml_error.MOLECULE_UNEXPECTED_TOKEN, _l10n_reg.get_message( lang_id, "parser.molecule.error.unexpected_token.description"), options) err.push_traceback( expression, token.get_position(), token.get_position() + len(token.get_symbol()) - 1, _l10n_reg.get_message( lang_id, "parser.molecule.error.unexpected_token.electronic_misplaced" )) raise err # Fetch the molecule status. if token.is_gas_status(): molecule_status = _ml_ast_base.STATUS_GAS elif token.is_liquid_status(): molecule_status = _ml_ast_base.STATUS_LIQUID elif token.is_solid_status(): molecule_status = _ml_ast_base.STATUS_SOLID elif token.is_aqueous_status(): molecule_status = _ml_ast_base.STATUS_AQUEOUS else: raise RuntimeError("BUG: Unrecognized status.") # Next token. cursor += 1 # Go to root state. state = _STATE_ROOT else: raise RuntimeError("BUG: Unrecognized state.") # Get the ending position. ending_pos = token_list[-1].get_position() - 1 # Initialize the parenthesis-mismatched flag. mismatch_flag = False # Pre-create an error. err = _cm_error.Error( _ml_error.MOLECULE_PARENTHESIS_MISMATCH, _l10n_reg.get_message( lang_id, "parser.molecule.error.parenthesis_mismatch.description"), options) while node is not None: # Register the ending position of current working node. node.register_ending_position_in_source_text(ending_pos) # Mark the error flag and add an error description if current node is a parenthesis node. if node.is_parenthesis(): mismatch_flag = True err.push_traceback( expression, node.get_starting_position_in_source_text(), node.get_starting_position_in_source_text(), _l10n_reg.get_message( lang_id, "parser.molecule.error.parenthesis_mismatch.right")) # Go to parent node. node = node.get_parent_node() # Raise an error if we have met at least 1 parenthesis node. if mismatch_flag: raise err # Now, we have constructed the whole AST, but we got a lot of useless hydrate group node. # So we have to remove them (all hydrate groups nodes which have only 1 child). # Get iterate order. unpack_order = _ml_ast_bfs.do_bfs(root, True) # Initialize unpacked node container. unpacked = {} for node in unpack_order: if node.is_hydrate_group(): assert isinstance(node, _ml_ast_base.ASTNodeHydrateGroup) if len(node) == 1: # Get the child node and reset its parent child = unpacked[id(node[0])] child.set_parent_node(node.get_parent_node()) # Save the unpack result. unpacked[id(node)] = child else: # Update children links. for child_id in range(0, len(node)): node[child_id] = unpacked[id(node[child_id])] # Save the unpack result. unpacked[id(node)] = node elif node.is_molecule(): assert isinstance(node, _ml_ast_base.ASTNodeMolecule) # Update children links. for child_id in range(0, len(node)): node[child_id] = unpacked[id(node[child_id])] # Save the unpack result. unpacked[id(node)] = node elif node.is_parenthesis(): assert isinstance(node, _ml_ast_base.ASTNodeParenthesisWrapper) # Update children links. node.set_inner_node(unpacked[id(node.get_inner_node())]) # Save the unpack result. unpacked[id(node)] = node else: # Save the unpack result. unpacked[id(node)] = node # Set molecule status. root = unpacked[id(root)] """:type : bce.parser.ast.molecule.ASTNodeHydrateGroup | bce.parser.ast.molecule.ASTNodeMolecule""" root.set_status(molecule_status) return root
def print_ast( root_node, mexp_parser, mexp_protected_header_enabled=False, mexp_protected_header_prefix="X" ): """Print an AST to BCE expression. :type root_node: bce.parser.ast.molecule.ASTNodeHydrateGroup | bce.parser.ast.molecule.ASTNodeMolecule :type mexp_parser: bce.parser.interface.mexp_parser.MathExpressionParserInterface :type mexp_protected_header_enabled: bool :type mexp_protected_header_prefix: str :param root_node: The root node of the AST. :param mexp_parser: The math expression parser. :param mexp_protected_header_enabled: Whether the MEXP protected headers are enabled. :param mexp_protected_header_prefix: The prefix of the MEXP protected headers. :rtype : bce.dom.mathml.all.Base :return: The printed expression. """ # Get the printing order. work_order = _ml_ast_bfs.do_bfs(root_node, True) # Initialize the printed result container. printed = {} for work_node in work_order: if work_node.is_hydrate_group(): assert isinstance(work_node, _ml_ast_base.ASTNodeHydrateGroup) # Initialize a row component to contain the printing result. build = _mathml.RowComponent() # Print the prefix number part. pfx = work_node.get_prefix_number().simplify() if pfx != _math_constant.ONE: build.append_object(_print_operand( pfx, True, mexp_parser, mexp_protected_header_enabled=mexp_protected_header_enabled, mexp_protected_header_prefix=mexp_protected_header_prefix )) build.append_object(_mathml.OperatorComponent(_mathml.OPERATOR_LEFT_PARENTHESIS)) surround = True else: surround = False # Print children nodes. build.append_object(printed[id(work_node[0])]) for child_id in range(1, len(work_node)): build.append_object(_mathml.OperatorComponent(_mathml.OPERATOR_DOT)) build.append_object(printed[id(work_node[child_id])]) # Complete the surrounding parentheses if the flag was marked. if surround: build.append_object(_mathml.OperatorComponent(_mathml.OPERATOR_RIGHT_PARENTHESIS)) # Save printing result. printed[id(work_node)] = build elif work_node.is_molecule(): assert isinstance(work_node, _ml_ast_base.ASTNodeMolecule) # Initialize a row component to contain the printing result. build = _mathml.RowComponent() # Print the prefix number part. pfx = work_node.get_prefix_number().simplify() if pfx != _math_constant.ONE: build.append_object(_print_operand( pfx, True, mexp_parser, mexp_protected_header_enabled=mexp_protected_header_enabled, mexp_protected_header_prefix=mexp_protected_header_prefix )) # Print children nodes. for child_id in range(0, len(work_node)): build.append_object(printed[id(work_node[child_id])]) el_charge = work_node.get_electronic_count().simplify() if not el_charge.is_zero: if len(work_node) == 0: build.append_object(_mathml.SuperComponent( _mathml.TextComponent("e"), _print_super_electronic( el_charge, mexp_parser, mexp_protected_header_enabled=mexp_protected_header_enabled, mexp_protected_header_prefix=mexp_protected_header_prefix ) )) else: # Find the innermost row component. innermost = build while innermost[-1].is_row(): innermost = innermost[-1] # Fetch the last item. last_item = innermost[-1] # Add the electronic. if last_item.is_sub(): assert isinstance(last_item, _mathml.SubComponent) last_item = _mathml.SubAndSuperComponent( last_item.get_main_object(), last_item.get_sub_object(), _print_super_electronic( el_charge, mexp_parser, mexp_protected_header_enabled=mexp_protected_header_enabled, mexp_protected_header_prefix=mexp_protected_header_prefix ) ) else: last_item = _mathml.SuperComponent( last_item, _print_super_electronic( el_charge, mexp_parser, mexp_protected_header_enabled=mexp_protected_header_enabled, mexp_protected_header_prefix=mexp_protected_header_prefix ) ) # Save the modified item. innermost[-1] = last_item # Save printing result. printed[id(work_node)] = build elif work_node.is_atom(): assert isinstance(work_node, _ml_ast_base.ASTNodeAtom) # Print and save the result. printed[id(work_node)] = _print_suffix( _mathml.TextComponent(work_node.get_atom_symbol()), work_node, mexp_parser, mexp_protected_header_enabled=mexp_protected_header_enabled, mexp_protected_header_prefix=mexp_protected_header_prefix ) elif work_node.is_parenthesis(): assert isinstance(work_node, _ml_ast_base.ASTNodeParenthesisWrapper) # Initialize a row component to contain the printing result. build = _mathml.RowComponent() # Print. build.append_object(_mathml.OperatorComponent(_mathml.OPERATOR_LEFT_PARENTHESIS)) build.append_object(printed[id(work_node.get_inner_node())]) build.append_object(_print_suffix( _mathml.OperatorComponent(_mathml.OPERATOR_RIGHT_PARENTHESIS), work_node, mexp_parser, mexp_protected_header_enabled=mexp_protected_header_enabled, mexp_protected_header_prefix=mexp_protected_header_prefix )) # Save printing result. printed[id(work_node)] = build elif work_node.is_abbreviation(): assert isinstance(work_node, _ml_ast_base.ASTNodeAbbreviation) # Print and save the result. printed[id(work_node)] = _print_suffix( _mathml.TextComponent("[%s]" % work_node.get_abbreviation_symbol()), work_node, mexp_parser, mexp_protected_header_enabled=mexp_protected_header_enabled, mexp_protected_header_prefix=mexp_protected_header_prefix ) else: raise RuntimeError("BUG: Unhandled AST node type.") # Post process - add status. post_process = printed[id(root_node)] if root_node.get_status() is not None: if not post_process.is_row(): tmp = _mathml.RowComponent() tmp.append_object(post_process) post_process = tmp post_process.append_object(_mathml.OperatorComponent(_mathml.OPERATOR_LEFT_PARENTHESIS)) if root_node.get_status() == _ml_ast_base.STATUS_GAS: post_process.append_object(_mathml.TextComponent("g")) elif root_node.get_status() == _ml_ast_base.STATUS_LIQUID: post_process.append_object(_mathml.TextComponent("l")) elif root_node.get_status() == _ml_ast_base.STATUS_SOLID: post_process.append_object(_mathml.TextComponent("s")) elif root_node.get_status() == _ml_ast_base.STATUS_AQUEOUS: post_process.append_object(_mathml.TextComponent("aq")) else: raise RuntimeError("BUG: No such status.") post_process.append_object(_mathml.OperatorComponent(_mathml.OPERATOR_RIGHT_PARENTHESIS)) return printed[id(root_node)]
def substitute_ast(root_node, subst_map): """Substitution an AST and save the substituted one to a new AST. :type root_node: bce.parser.ast.molecule.ASTNodeHydrateGroup | bce.parser.ast.molecule.ASTNodeMolecule :type subst_map: dict :param root_node: The root node of the origin AST. :param subst_map: The substitution map. :rtype : bce.parser.ast.molecule.ASTNodeHydrateGroup | bce.parser.ast.molecule.ASTNodeMolecule | None :return: The root node of the new AST. """ # Get the BFS order (from the leaves to the root). work_order = _ast_bfs.do_bfs(root_node, True) # Initialize the substituted data container. substituted = {} """:type : dict[int, bce.parser.ast.molecule._ASTNodeBaseML | None]""" # Iterate each node. for work_node in work_order: if work_node.is_hydrate_group(): assert isinstance(work_node, _ast_base.ASTNodeHydrateGroup) # Get and substitute the prefix number. pfx = work_node.get_prefix_number().subs(subst_map).simplify() _check_substituted_mexp(pfx) if pfx.is_zero: substituted[id(work_node)] = None continue # Create a new hydrate group node. build_node = _ast_base.ASTNodeHydrateGroup() # Set the prefix number. build_node.set_prefix_number(pfx) # Iterate each child. for child_id in range(0, len(work_node)): # Get child data. child_node = substituted[id(work_node[child_id])] if child_node is not None: assert isinstance(child_node, _ast_base.ASTNodeMolecule) # Simulate raise an error if the child raised before. if child_node.get_property( _PROPERTY_KEY_SUBSTITUTION_ERROR_RAISED, False): build_node.set_property( _PROPERTY_KEY_SUBSTITUTION_ERROR_RAISED, True) # Link. child_node.set_parent_node(build_node) build_node.append_child(child_node) # Eliminate the node if there is no content inside. if len(build_node) == 0: substituted[id(work_node)] = None continue # Unpack the hydrate group if there is only 1 molecule in it. if len(build_node) == 1: # Get the prefix number of the hydrate group. pfx = build_node.get_prefix_number() # Unpack. build_node = build_node[0] assert isinstance(build_node, _ast_base.ASTNodeMolecule) # Get the new prefix of the unpacked node. pfx = (pfx * build_node.get_prefix_number()).simplify() if pfx.is_zero: # Eliminate the node since the prefix is 0. substituted[id(work_node)] = None else: # Set the parent node and prefix number of the unpacked node. # noinspection PyTypeChecker build_node.set_parent_node(None) build_node.set_prefix_number(pfx) # Save. substituted[id(work_node)] = build_node else: for child_id in range(0, len(build_node)): # Get the child node. child_node = build_node[child_id] assert isinstance(child_node, _ast_base.ASTNodeMolecule) # Check the prefix number of the child. if child_node.get_prefix_number().simplify().is_negative: build_node.set_property( _PROPERTY_KEY_SUBSTITUTION_ERROR_RAISED, True) break # Save. substituted[id(work_node)] = build_node elif work_node.is_molecule(): assert isinstance(work_node, _ast_base.ASTNodeMolecule) # Get and substitute the prefix number. pfx = work_node.get_prefix_number().subs(subst_map).simplify() _check_substituted_mexp(pfx) if pfx.is_zero: substituted[id(work_node)] = None continue # Create a new molecule node. build_node = _ast_base.ASTNodeMolecule() # Substitute the electronic count. substituted_charge = work_node.get_electronic_count().subs( subst_map).simplify() _check_substituted_mexp(substituted_charge) build_node.set_electronic_count(substituted_charge) # Set the prefix number. build_node.set_prefix_number(pfx) # Iterate each child. for child_id in range(0, len(work_node)): # Get the child node. child_node = substituted[id(work_node[child_id])] if child_node is not None: # Raise an error if the child raised before. if child_node.get_property( _PROPERTY_KEY_SUBSTITUTION_ERROR_RAISED, False): build_node.set_property( _PROPERTY_KEY_SUBSTITUTION_ERROR_RAISED, True) # Link. child_node.set_parent_node(build_node) build_node.append_child(child_node) if len(build_node) == 0 and build_node.get_electronic_count( ).simplify().is_zero: # Eliminate this node since there is no content inside and the electronic count is 0. substituted[id(work_node)] = None else: # Save. substituted[id(work_node)] = build_node elif work_node.is_atom(): assert isinstance(work_node, _ast_base.ASTNodeAtom) # Initialize an atom node. build_node = _ast_base.ASTNodeAtom(work_node.get_atom_symbol()) # Get and substitute the suffix number. sfx = work_node.get_suffix_number().subs(subst_map).simplify() _check_substituted_mexp(sfx) # Eliminate this node if the suffix number is 0. if sfx.is_zero: substituted[id(work_node)] = None continue if sfx.is_negative: build_node.set_property( _PROPERTY_KEY_SUBSTITUTION_ERROR_RAISED, True) # Set the suffix number. build_node.set_suffix_number(sfx) # Save. substituted[id(work_node)] = build_node elif work_node.is_parenthesis(): assert isinstance(work_node, _ast_base.ASTNodeParenthesisWrapper) # Get and substitute the suffix number. sfx = work_node.get_suffix_number().subs(subst_map).simplify() _check_substituted_mexp(sfx) # Get the substituted inner data. inner_node = substituted[id(work_node.get_inner_node())] assert isinstance(inner_node, _ast_base.ASTNodeHydrateGroup) or \ isinstance(inner_node, _ast_base.ASTNodeMolecule) or \ inner_node is None # Eliminate this node if the suffix number is zero or there is nothing inside. if sfx.is_zero or inner_node is None: substituted[id(work_node)] = None continue # Create a new parenthesis wrapper node. build_node = _ast_base.ASTNodeParenthesisWrapper(inner_node) # Link. inner_node.set_parent_node(build_node) # Set the suffix number. build_node.set_suffix_number(sfx) if sfx.is_negative or inner_node.get_prefix_number().simplify().is_negative or \ inner_node.get_property(_PROPERTY_KEY_SUBSTITUTION_ERROR_RAISED, False): # Raise an error since the suffix is negative or the prefix number or the # inner node is negative or the child raised an error before. build_node.set_property( _PROPERTY_KEY_SUBSTITUTION_ERROR_RAISED, True) # Save. substituted[id(work_node)] = build_node elif work_node.is_abbreviation(): assert isinstance(work_node, _ast_base.ASTNodeAbbreviation) # Create an abbreviation node. build_node = _ast_base.ASTNodeAbbreviation( work_node.get_abbreviation_symbol()) # Get and substitute the suffix number. sfx = work_node.get_suffix_number().subs(subst_map).simplify() _check_substituted_mexp(sfx) # Eliminate this node if the suffix number is 0. if sfx.is_zero: substituted[id(work_node)] = None continue if sfx.is_negative: build_node.set_property( _PROPERTY_KEY_SUBSTITUTION_ERROR_RAISED, True) # Set the suffix number. build_node.set_suffix_number(sfx) # Save substituted[id(work_node)] = build_node else: raise RuntimeError("BUG: Unrecognized node.") # Get the substituted root node data. new_root = substituted[id(root_node)] assert isinstance(new_root, _ast_base.ASTNodeHydrateGroup) or \ isinstance(new_root, _ast_base.ASTNodeMolecule) or \ new_root is None if new_root is not None: # Raise an error if the root raised error before. if new_root.get_property(_PROPERTY_KEY_SUBSTITUTION_ERROR_RAISED, False): raise _ml_interface.SubstituteError( "An error occurred when do substitution on the molecule.") # Set molecule status. new_root.set_status(root_node.get_status()) # Remove all "SubstitutionErrorRaised" property. for root_id in substituted: node = substituted[root_id] if node is not None and node.has_property( _PROPERTY_KEY_SUBSTITUTION_ERROR_RAISED): node.remove_property(_PROPERTY_KEY_SUBSTITUTION_ERROR_RAISED) return new_root