def substitute_ast(root_node, subst_map): """Substitution an AST and save the substituted one to a new AST. :type root_node: _ast_base.ASTNodeHydrateGroup | _ast_base.ASTNodeMolecule :type subst_map: dict :param root_node: The root node of the origin AST. :param subst_map: The substitution map. :rtype : _ast_base.ASTNodeHydrateGroup | _ast_base.ASTNodeMolecule | None :return: The root node of the new AST. """ # Get the BFS order (from the leaves to the root). work_order = _ast_bfs.do_bfs(root_node, True) # Initialize the substituted data container. substituted = {} """:type : dict[int, _ast_base._ASTNodeBaseML | None]""" # Iterate each node. for work_node in work_order: if work_node.is_hydrate_group(): assert isinstance(work_node, _ast_base.ASTNodeHydrateGroup) # Get and substitute the prefix number. pfx = work_node.get_prefix_number().subs(subst_map).simplify() _check_substituted_mexp(pfx) if pfx.is_zero: substituted[id(work_node)] = None continue # Create a new hydrate group node. build_node = _ast_base.ASTNodeHydrateGroup() # Set the prefix number. build_node.set_prefix_number(pfx) # Iterate each child. for child_id in range(0, len(work_node)): # Get child data. child_node = substituted[id(work_node[child_id])] if child_node is not None: assert isinstance(child_node, _ast_base.ASTNodeMolecule) # Simulate raise an error if the child raised before. if child_node.get_property("_substitution_error", False): build_node.set_property("_substitution_error", True) # Link. child_node.set_parent_node(build_node) build_node.append_child(child_node) # Eliminate the node if there is no content inside. if len(build_node) == 0: substituted[id(work_node)] = None continue # Unpack the hydrate group if there is only 1 molecule in it. if len(build_node) == 1: # Get the prefix number of the hydrate group. pfx = build_node.get_prefix_number() # Unpack. build_node = build_node[0] assert isinstance(build_node, _ast_base.ASTNodeMolecule) # Get the new prefix of the unpacked node. pfx = (pfx * build_node.get_prefix_number()).simplify() if pfx.is_zero: # Eliminate the node since the prefix is 0. substituted[id(work_node)] = None else: # Set the parent node and prefix number of the unpacked node. # noinspection PyTypeChecker build_node.set_parent_node(None) build_node.set_prefix_number(pfx) # Save. substituted[id(work_node)] = build_node else: for child_id in range(0, len(build_node)): # Get the child node. child_node = build_node[child_id] assert isinstance(child_node, _ast_base.ASTNodeMolecule) # Check the prefix number of the child. if child_node.get_prefix_number().simplify().is_negative: build_node.set_property("_substitution_error", True) break # Save. substituted[id(work_node)] = build_node elif work_node.is_molecule(): assert isinstance(work_node, _ast_base.ASTNodeMolecule) # Get and substitute the prefix number. pfx = work_node.get_prefix_number().subs(subst_map).simplify() _check_substituted_mexp(pfx) if pfx.is_zero: substituted[id(work_node)] = None continue # Create a new molecule node. build_node = _ast_base.ASTNodeMolecule() # Substitute the electronic count. substituted_charge = work_node.get_electronic_count().subs( subst_map).simplify() _check_substituted_mexp(substituted_charge) build_node.set_electronic_count(substituted_charge) # Set the prefix number. build_node.set_prefix_number(pfx) # Iterate each child. for child_id in range(0, len(work_node)): # Get the child node. child_node = substituted[id(work_node[child_id])] if child_node is not None: # Raise an error if the child raised before. if child_node.get_property("_substitution_error", False): build_node.set_property("_substitution_error", True) # Link. child_node.set_parent_node(build_node) build_node.append_child(child_node) if len(build_node) == 0 and build_node.get_electronic_count( ).simplify().is_zero: # Eliminate this node since there is no content inside and the electronic count is 0. substituted[id(work_node)] = None else: # Save. substituted[id(work_node)] = build_node elif work_node.is_atom(): assert isinstance(work_node, _ast_base.ASTNodeAtom) # Initialize an atom node. build_node = _ast_base.ASTNodeAtom(work_node.get_atom_symbol()) # Get and substitute the suffix number. sfx = work_node.get_suffix_number().subs(subst_map).simplify() _check_substituted_mexp(sfx) # Eliminate this node if the suffix number is 0. if sfx.is_zero: substituted[id(work_node)] = None continue if sfx.is_negative: build_node.set_property("_substitution_error", True) # Set the suffix number. build_node.set_suffix_number(sfx) # Save. substituted[id(work_node)] = build_node elif work_node.is_parenthesis(): assert isinstance(work_node, _ast_base.ASTNodeParenthesisWrapper) # Get and substitute the suffix number. sfx = work_node.get_suffix_number().subs(subst_map).simplify() _check_substituted_mexp(sfx) # Get the substituted inner data. inner_node = substituted[id(work_node.get_inner_node())] assert isinstance(inner_node, _ast_base.ASTNodeHydrateGroup) or \ isinstance(inner_node, _ast_base.ASTNodeMolecule) or \ inner_node is None # Eliminate this node if the suffix number is zero or there is nothing inside. if sfx.is_zero or inner_node is None: substituted[id(work_node)] = None continue # Create a new parenthesis wrapper node. build_node = _ast_base.ASTNodeParenthesisWrapper(inner_node) # Link. inner_node.set_parent_node(build_node) # Set the suffix number. build_node.set_suffix_number(sfx) if sfx.is_negative or inner_node.get_prefix_number().simplify().is_negative or \ inner_node.get_property("_substitution_error", False): # Raise an error since the suffix is negative or the prefix number or the # inner node is negative or the child raised an error before. build_node.set_property("_substitution_error", True) # Save. substituted[id(work_node)] = build_node elif work_node.is_abbreviation(): assert isinstance(work_node, _ast_base.ASTNodeAbbreviation) # Create an abbreviation node. build_node = _ast_base.ASTNodeAbbreviation( work_node.get_abbreviation_symbol()) # Get and substitute the suffix number. sfx = work_node.get_suffix_number().subs(subst_map).simplify() _check_substituted_mexp(sfx) # Eliminate this node if the suffix number is 0. if sfx.is_zero: substituted[id(work_node)] = None continue if sfx.is_negative: build_node.set_property("_substitution_error", True) # Set the suffix number. build_node.set_suffix_number(sfx) # Save substituted[id(work_node)] = build_node else: raise RuntimeError("BUG: Unrecognized node.") # Get the substituted root node data. new_root = substituted[id(root_node)] assert isinstance(new_root, _ast_base.ASTNodeHydrateGroup) or \ isinstance(new_root, _ast_base.ASTNodeMolecule) or \ new_root is None if new_root is not None: # Raise an error if the root raised error before. if new_root.get_property("_substitution_error", False): raise SubstituteError( "An error occurred when do substitution on the molecule.") # Set molecule status. new_root.set_status(root_node.get_status()) return new_root
def decompile_ast(root_node, options): """Decompile an AST to BCE expression. :type root_node: _ml_ast_base.ASTNodeHydrateGroup | _ml_ast_base.ASTNodeMolecule :type options: _opt.Option :param root_node: The root node of the AST. :param options: The BCE options. :return: The decompiled expression. """ # Get the decompile order. work_order = _ml_ast_bfs.do_bfs(root_node, True) # Initialize the decompiling result container. decompiled = {} for work_node in work_order: if work_node.is_hydrate_group(): assert isinstance(work_node, _ml_ast_base.ASTNodeHydrateGroup) # Initialize a row component to contain the decompiling result. build = _mathml.RowComponent() # Decompile the prefix number part. pfx = work_node.get_prefix_number().simplify() if pfx != _math_cst.ONE: build.append_object(_decompile_operand(pfx, True, options)) build.append_object(_mathml.OperatorComponent(_mathml.OPERATOR_LEFT_PARENTHESIS)) surround = True else: surround = False # Decompile children nodes. build.append_object(decompiled[id(work_node[0])]) for child_id in range(1, len(work_node)): build.append_object(_mathml.OperatorComponent(_mathml.OPERATOR_DOT)) build.append_object(decompiled[id(work_node[child_id])]) # Complete the surrounding parentheses if the flag was marked. if surround: build.append_object(_mathml.OperatorComponent(_mathml.OPERATOR_RIGHT_PARENTHESIS)) # Save decompiling result. decompiled[id(work_node)] = build elif work_node.is_molecule(): assert isinstance(work_node, _ml_ast_base.ASTNodeMolecule) # Initialize a row component to contain the decompiling result. build = _mathml.RowComponent() # Decompile the prefix number part. pfx = work_node.get_prefix_number().simplify() if pfx != _math_cst.ONE: build.append_object(_decompile_operand(pfx, True, options)) # Decompile children nodes. for child_id in range(0, len(work_node)): build.append_object(decompiled[id(work_node[child_id])]) el_charge = work_node.get_electronic_count().simplify() if not el_charge.is_zero: if len(work_node) == 0: build.append_object(_mathml.SuperComponent(_mathml.TextComponent("e"), _decompile_super_electronic( el_charge, options))) else: # Find the innermost row component. innermost = build while innermost[-1].is_row(): innermost = innermost[-1] # Fetch the last item. last_item = innermost[-1] # Add the electronic. if last_item.is_sub(): assert isinstance(last_item, _mathml.SubComponent) last_item = _mathml.SubAndSuperComponent(last_item.get_main_object(), last_item.get_sub_object(), _decompile_super_electronic(el_charge, options)) else: last_item = _mathml.SuperComponent(last_item, _decompile_super_electronic(el_charge, options)) # Save the modified item. innermost[-1] = last_item # Save decompiling result. decompiled[id(work_node)] = build elif work_node.is_atom(): assert isinstance(work_node, _ml_ast_base.ASTNodeAtom) # Decompile and save the result. decompiled[id(work_node)] = _decompile_suffix(_mathml.TextComponent(work_node.get_atom_symbol()), work_node, options) elif work_node.is_parenthesis(): assert isinstance(work_node, _ml_ast_base.ASTNodeParenthesisWrapper) # Initialize a row component to contain the decompiling result. build = _mathml.RowComponent() # Decompile. build.append_object(_mathml.OperatorComponent(_mathml.OPERATOR_LEFT_PARENTHESIS)) build.append_object(decompiled[id(work_node.get_inner_node())]) build.append_object(_decompile_suffix(_mathml.OperatorComponent(_mathml.OPERATOR_RIGHT_PARENTHESIS), work_node, options)) # Save decompiling result. decompiled[id(work_node)] = build elif work_node.is_abbreviation(): assert isinstance(work_node, _ml_ast_base.ASTNodeAbbreviation) # Decompile and save the result. decompiled[id(work_node)] = _decompile_suffix( _mathml.TextComponent("[%s]" % work_node.get_abbreviation_symbol()), work_node, options) else: raise RuntimeError("Never reach this condition.") post_process = decompiled[id(root_node)] if root_node.get_status() is not None: if not post_process.is_row(): tmp = _mathml.RowComponent() tmp.append_object(post_process) post_process = tmp post_process.append_object(_mathml.OperatorComponent(_mathml.OPERATOR_LEFT_PARENTHESIS)) if root_node.get_status() == _ml_status.STATUS_GAS: post_process.append_object(_mathml.TextComponent("g")) elif root_node.get_status() == _ml_status.STATUS_LIQUID: post_process.append_object(_mathml.TextComponent("l")) elif root_node.get_status() == _ml_status.STATUS_SOLID: post_process.append_object(_mathml.TextComponent("s")) elif root_node.get_status() == _ml_status.STATUS_AQUEOUS: post_process.append_object(_mathml.TextComponent("aq")) else: raise RuntimeError("BUG: No such molecule status.") post_process.append_object(_mathml.OperatorComponent(_mathml.OPERATOR_RIGHT_PARENTHESIS)) return decompiled[id(root_node)]
def decompile_ast(root_node): """Decompile an AST to BCE expression. :type root_node: _ml_ast_base.ASTNodeHydrateGroup | _ml_ast_base.ASTNodeMolecule :param root_node: The root node of the AST. :rtype : str :return: The decompiled expression. """ # Get the decompile order. work_order = _ml_ast_bfs.do_bfs(root_node, True) # Initialize the decompiling result container. decompiled = {} for work_node in work_order: if work_node.is_hydrate_group(): assert isinstance(work_node, _ml_ast_base.ASTNodeHydrateGroup) # Decompile the prefix number part. pfx = work_node.get_prefix_number().simplify() if pfx != _math_cst.ONE: model = _decompile_operand(pfx) + "(%s)" else: model = "%s" # Decompile children nodes. inner = decompiled[id(work_node[0])] for child_id in range(1, len(work_node)): inner += "." + decompiled[id(work_node[child_id])] # Save decompiling result. decompiled[id(work_node)] = model % inner elif work_node.is_molecule(): assert isinstance(work_node, _ml_ast_base.ASTNodeMolecule) # Decompile the prefix number part. pfx = work_node.get_prefix_number().simplify() build = _decompile_operand(pfx) # Decompile children nodes. for child_id in range(0, len(work_node)): build += decompiled[id(work_node[child_id])] # Decompile the electronic part. el_charge = work_node.get_electronic_count().simplify() if not el_charge.is_zero: build += _decompile_electronic(el_charge) # Save decompiling result. decompiled[id(work_node)] = build elif work_node.is_atom(): assert isinstance(work_node, _ml_ast_base.ASTNodeAtom) # Decompile and save the result. decompiled[id(work_node)] = work_node.get_atom_symbol( ) + _decompile_suffix(work_node) elif work_node.is_parenthesis(): assert isinstance(work_node, _ml_ast_base.ASTNodeParenthesisWrapper) # Decompile and save the result. decompiled[id(work_node)] = "(%s)%s" % (decompiled[id( work_node.get_inner_node())], _decompile_suffix(work_node)) elif work_node.is_abbreviation(): assert isinstance(work_node, _ml_ast_base.ASTNodeAbbreviation) # Decompile and save the result. decompiled[id( work_node)] = "[%s]%s" % (work_node.get_abbreviation_symbol(), _decompile_suffix(work_node)) else: raise RuntimeError("Never reach this condition.") # Post process. post_process = decompiled[id(root_node)] if root_node.is_gas_status(): post_process += "(g)" elif root_node.is_liquid_status(): post_process += "(l)" elif root_node.is_solid_status(): post_process += "(s)" elif root_node.is_aqueous_status(): post_process += "(aq)" else: pass return post_process
def parse_ast(expression, root_node, options): """Parse an AST. :type expression: str :type root_node: _ast_base.ASTNodeHydrateGroup | _ast_base.ASTNodeMolecule :type options: _opt.Option :param expression: The origin expression. :param root_node: The root node of the AST. :param options: The BCE options. :rtype : dict :return: The parsed atoms dictionary. """ # Get the iteration order. work_list = _ast_bfs.do_bfs(root_node, True) # Initialize the parsed node container. parsed = {} """:type : dict[int, MergeUtil]""" # Iterate nodes from the leaves to the root. for work_node in work_list: if work_node.is_hydrate_group() or work_node.is_molecule(): assert isinstance(work_node, _ast_base.ASTNodeHydrateGroup) or \ isinstance(work_node, _ast_base.ASTNodeMolecule) # Get the prefix number. coeff = work_node.get_prefix_number() # Initialize a new merge utility. build = MergeUtil() # Process the electronics. if work_node.is_molecule(): el_charge = work_node.get_electronic_count().simplify() if not el_charge.is_zero: build.add("e", el_charge * coeff) # Iterate all children. for child_id in range(0, len(work_node)): # Get child node and its parsing result. child = work_node[child_id] child_parsed = parsed[id(child)] # Content check. if work_node.is_hydrate_group() and len(child_parsed) == 0: assert isinstance(child, _ast_base.ASTNodeMolecule) err = _pe.Error(_ml_error.PE_ML_NO_CONTENT, _msg_id.MSG_PE_ML_NO_CONTENT_DESCRIPTION, options) if child_id == 0: err.push_traceback_ex(expression, child.get_ending_position_in_source_text() + 1, child.get_ending_position_in_source_text() + 1, _msg_id.MSG_PE_ML_NO_CONTENT_BEFORE) elif child_id == len(work_node) - 1: err.push_traceback_ex(expression, child.get_starting_position_in_source_text() - 1, child.get_starting_position_in_source_text() - 1, _msg_id.MSG_PE_ML_NO_CONTENT_AFTER) else: err.push_traceback_ex(expression, child.get_starting_position_in_source_text() - 1, child.get_ending_position_in_source_text() + 1, _msg_id.MSG_PE_ML_NO_CONTENT_INSIDE) raise err # Merge. build.merge(child_parsed, coeff) # Do simplifying. _macro_simplify(expression, build, work_node, options) # Save the parsed result. parsed[id(work_node)] = build elif work_node.is_atom(): assert isinstance(work_node, _ast_base.ASTNodeAtom) # Get suffix number. coeff = work_node.get_suffix_number() # Initialize a new merge utility. build = MergeUtil() # Add the atom. build.add(work_node.get_atom_symbol(), coeff) # Save the parsed result. parsed[id(work_node)] = build elif work_node.is_parenthesis(): assert isinstance(work_node, _ast_base.ASTNodeParenthesisWrapper) # Get suffix number. coeff = work_node.get_suffix_number() # Initialize a new merge utility. build = MergeUtil() # Get inner node and its parsing result. inner_parsed = parsed[id(work_node.get_inner_node())] # Content check. if len(inner_parsed) == 0: err = _pe.Error(_ml_error.PE_ML_NO_CONTENT, _msg_id.MSG_PE_ML_NO_CONTENT_DESCRIPTION, options) err.push_traceback_ex(expression, work_node.get_starting_position_in_source_text(), work_node.get_right_parenthesis_position(), _msg_id.MSG_PE_ML_NO_CONTENT_INSIDE) raise err # Merge. build.merge(inner_parsed, coeff) # Do simplifying. _macro_simplify(expression, build, work_node, options) # Save the parsed result. parsed[id(work_node)] = build elif work_node.is_abbreviation(): assert isinstance(work_node, _ast_base.ASTNodeAbbreviation) # Get the abbreviation symbol. abbr_symbol = work_node.get_abbreviation_symbol() # Check symbol length. if len(abbr_symbol) == 0: err = _pe.Error(_ml_error.PE_ML_NO_CONTENT, _msg_id.MSG_PE_ML_NO_CONTENT_DESCRIPTION, options) err.push_traceback_ex(expression, work_node.get_starting_position_in_source_text(), work_node.get_right_parenthesis_position(), _msg_id.MSG_PE_ML_NO_CONTENT_INSIDE) raise err # Initialize the resolving result container. abbr_resolved = None # Try to resolve in the user defined dictionary. if options.is_user_abbreviation_dictionary_enabled(): user_dict = options.get_user_abbreviation_dictionary() if abbr_symbol in user_dict: abbr_resolved = user_dict[abbr_symbol] # Try to resolve in system dictionary if it hasn't been resolved. if abbr_resolved is None and abbr_symbol in _ml_abbr.ABBREVIATIONS: abbr_resolved = _ml_abbr.ABBREVIATIONS[abbr_symbol] # Raise an error if the abbreviation can't be resolved. if abbr_resolved is None: err = _pe.Error(_ml_error.PE_ML_UNSUPPORTED_ABBREVIATION, _msg_id.MSG_PE_ML_UNSUPPORTED_ABBREVIATION_DESCRIPTION, options) err.push_traceback_ex(expression, work_node.get_starting_position_in_source_text() + 1, work_node.get_right_parenthesis_position() - 1, _msg_id.MSG_PE_ML_UNSUPPORTED_ABBREVIATION_TB_MESSAGE) raise err # Initialize a new merge utility. build = MergeUtil() # Get the suffix number. coeff = work_node.get_suffix_number() # Add atoms. for atom_symbol in abbr_resolved: build.add(atom_symbol, abbr_resolved[atom_symbol] * coeff) # Do simplifying. _macro_simplify(expression, build, work_node, options) # Save the parsed result. parsed[id(work_node)] = build else: raise RuntimeError("Never reach this condition.") # Get the parsing result of the root node. root_node_parsed = parsed[id(root_node)] # Content check. if len(root_node_parsed) == 0: err = _pe.Error(_ml_error.PE_ML_NO_CONTENT, _msg_id.MSG_PE_ML_NO_CONTENT_DESCRIPTION, options) err.push_traceback_ex(expression, 0, len(expression) - 1, _msg_id.MSG_PE_ML_NO_CONTENT_INSIDE) raise err return root_node_parsed.get_data()
def generate_ast(expression, token_list, options): """Generate an AST from the token list. :type expression: str :type token_list: list[_ml_token.Token] :type options: _opt.Option :param expression: The origin expression. :param token_list: The token list. :param options: The BCE options. :rtype : _ast_base.ASTNodeHydrateGroup | _ast_base.ASTNodeMolecule :return: The root node of the generated AST. """ # Initialize the molecule status container. molecule_status = None # Initialize the state machine. state = _STATE_ROOT # Generate initial AST. root = _ast_base.ASTNodeHydrateGroup() node = _ast_base.ASTNodeMolecule(root) root.append_child(node) # Register the starting position. root.register_starting_position_in_source_text(0) node.register_starting_position_in_source_text(0) # Initialize the token cursor. cursor = 0 while True: # Get current token. token = token_list[cursor] if state == _STATE_ROOT: # Find molecule in parent nodes and current node. while node is not None and not node.is_molecule(): node = node.get_parent_node() if node is None: raise RuntimeError("BUG: Can't find molecule group.") # Redirect by rules. if token.is_operand() and len(node) == 0: state = _STATE_PREFIX_NUMBER elif token.is_symbol(): state = _STATE_ATOM elif token.is_abbreviation(): state = _STATE_ABBREVIATION elif token.is_left_parenthesis(): state = _STATE_LEFT_PARENTHESIS elif token.is_right_parenthesis(): state = _STATE_RIGHT_PARENTHESIS elif token.is_electronic_begin(): state = _STATE_ELECTRONIC elif token.is_hydrate_dot(): state = _STATE_HYDRATE_DOT elif token.is_status(): state = _STATE_MOLECULE_STATUS elif token.is_end(): break else: # Raise an error if the token can't be recognized. err = _pe.Error(_ml_error.PE_ML_UNEXPECTED_TOKEN, _msg_id.MSG_PE_ML_UNEXPECTED_TOKEN_DESCRIPTION, options) err.push_traceback_ex(expression, token.get_position(), token.get_position() + len(token.get_symbol()) - 1, _msg_id.MSG_PE_ML_UNEXPECTED_TOKEN_DEFAULT) raise err elif state == _STATE_ATOM: # Create a new atom node and register its starting position. new_node = _ast_base.ASTNodeAtom(token.get_symbol(), node) new_node.register_starting_position_in_source_text(token.get_position()) # Add the node to the molecule group. node.append_child(new_node) # Switch the node pointer to the new created node. node = new_node # Next token. cursor += 1 # Go to read the suffix number. state = _STATE_SUFFIX_NUMBER elif state == _STATE_ABBREVIATION: # Create a new abbreviation node and register its starting position. new_node = _ast_base.ASTNodeAbbreviation(token.get_symbol()[1:-1], node) new_node.register_starting_position_in_source_text(token.get_position()) # Add the node to the molecule group. node.append_child(new_node) # Switch the node pointer to the new created node. node = new_node # Next token. cursor += 1 # Go to read the suffix number. state = _STATE_SUFFIX_NUMBER elif state == _STATE_LEFT_PARENTHESIS: # Create new nodes. new_hydrate_grp = _ast_base.ASTNodeHydrateGroup() new_molecule = _ast_base.ASTNodeMolecule(new_hydrate_grp) new_parenthesis = _ast_base.ASTNodeParenthesisWrapper(new_hydrate_grp, node) # Link them correctly and them add the new created parenthesis node to the molecule group. new_hydrate_grp.set_parent_node(new_parenthesis) new_hydrate_grp.append_child(new_molecule) node.append_child(new_parenthesis) # Switch the node pointer to the new created molecule node. node = new_molecule # Register their starting positions. new_hydrate_grp.register_starting_position_in_source_text(token.get_position() + 1) new_molecule.register_starting_position_in_source_text(token.get_position() + 1) new_parenthesis.register_starting_position_in_source_text(token.get_position()) # Next token. cursor += 1 # Go to root state. state = _STATE_ROOT elif state == _STATE_RIGHT_PARENTHESIS: # Find parenthesis node in parent nodes and current node. while node is not None and not node.is_parenthesis(): # Register the ending position of current working node. node.register_ending_position_in_source_text(token.get_position() - 1) # Go to the parent node. node = node.get_parent_node() # Raise an error if the node can't be found. if node is None: err = _pe.Error(_ml_error.PE_ML_PARENTHESIS_MISMATCH, _msg_id.MSG_PE_ML_PARENTHESIS_MISMATCH_DESCRIPTION, options) err.push_traceback_ex(expression, token.get_position(), token.get_position(), _msg_id.MSG_PE_ML_PARENTHESIS_MISMATCH_MISSING_LEFT) raise err # Register the ending position of current working node. node.set_right_parenthesis_position(token.get_position()) # Next token. cursor += 1 # Go to read the suffix number. state = _STATE_SUFFIX_NUMBER elif state == _STATE_ELECTRONIC: # Save the starting position of the electronic descriptor. e_start_pos = token.get_position() # Next token. cursor += 1 token = token_list[cursor] # Try to read the prefix number. e_pfx = _math_cst.ONE e_pfx_start = token.get_position() has_e_pfx_number = False while token.is_operand(): # Mark the flag. has_e_pfx_number = True # Process the prefix number. e_pfx *= token.get_operand_value().simplify() # Next token. cursor += 1 token = token_list[cursor] e_pfx = e_pfx.simplify() # Domain check. if e_pfx.is_negative or e_pfx.is_zero: err = _pe.Error(_ml_error.PE_ML_DOMAIN_ERROR, _msg_id.MSG_PE_ML_DOMAIN_ERROR_DESCRIPTION, options) err.push_traceback_ex(expression, e_pfx_start, token.get_position() - 1, _msg_id.MSG_PE_ML_DOMAIN_ERROR_EL_CHG) raise err # Validate. if has_e_pfx_number and e_pfx == _math_cst.ONE: err = _pe.Error(_ml_error.PE_ML_USELESS_OPERAND, _msg_id.MSG_PE_ML_USELESS_OPERAND_DESCRIPTION, options) err.push_traceback_ex(expression, e_pfx_start, token.get_position() - 1, _msg_id.MSG_PE_ML_USELESS_OPERAND_EL_CHG) raise err # Process the electronic positivity flag. if token.is_electronic_positive_flag(): pass elif token.is_electronic_negative_flag(): e_pfx = -e_pfx else: if token.is_end(): err = _pe.Error(_ml_error.PE_ML_PARENTHESIS_MISMATCH, _msg_id.MSG_PE_ML_PARENTHESIS_MISMATCH_DESCRIPTION, options) err.push_traceback_ex(expression, e_start_pos, token.get_position() - 1, _msg_id.MSG_PE_ML_PARENTHESIS_MISMATCH_MISSING_RIGHT) else: # Raise an error if current working token is not an electronic positivity flag. err = _pe.Error(_ml_error.PE_ML_UNEXPECTED_TOKEN, _msg_id.MSG_PE_ML_UNEXPECTED_TOKEN_DESCRIPTION, options) err.push_traceback_ex(expression, token.get_position(), token.get_position() + len(token.get_symbol()) - 1, _msg_id.MSG_PE_ML_UNEXPECTED_TOKEN_EL_POSITIVITY_OR_INTEGER) raise err # Next token. cursor += 1 token = token_list[cursor] # Raise an error if current working token is not '>'. if not token.is_electronic_end(): if token.is_end(): err = _pe.Error(_ml_error.PE_ML_PARENTHESIS_MISMATCH, _msg_id.MSG_PE_ML_PARENTHESIS_MISMATCH_DESCRIPTION, options) err.push_traceback_ex(expression, e_start_pos, token.get_position() - 1, _msg_id.MSG_PE_ML_PARENTHESIS_MISMATCH_MISSING_RIGHT) else: err = _pe.Error(_ml_error.PE_ML_UNEXPECTED_TOKEN, _msg_id.MSG_PE_ML_UNEXPECTED_TOKEN_DESCRIPTION, options) err.push_traceback_ex(expression, token.get_position(), token.get_position() + len(token.get_symbol()) - 1, _msg_id.MSG_PE_ML_UNEXPECTED_TOKEN_EL_END) raise err # Next token. cursor += 1 token = token_list[cursor] # Raise an error if the electronic descriptor is not at the end of a molecule block. if not (token.is_right_parenthesis() or token.is_hydrate_dot() or token.is_end() or token.is_status()): err = _pe.Error(_ml_error.PE_ML_UNEXPECTED_TOKEN, _msg_id.MSG_PE_ML_UNEXPECTED_TOKEN_DESCRIPTION, options) err.push_traceback_ex(expression, e_start_pos, token.get_position() - 1, _msg_id.MSG_PE_ML_UNEXPECTED_TOKEN_EL_MISPLACED) raise err # Set the electronic count. node.set_electronic_count(e_pfx) # Go to root state. state = _STATE_ROOT elif state == _STATE_HYDRATE_DOT: # Save the ending position of current working node. node.register_ending_position_in_source_text(token.get_position() - 1) # Go to parent node. node = node.get_parent_node() assert isinstance(node, _ast_base.ASTNodeHydrateGroup) # Create a new molecule node and set its starting position. new_molecule = _ast_base.ASTNodeMolecule(node) new_molecule.register_starting_position_in_source_text(token.get_position() + 1) # Add the new created molecule node to the hydrate group node. node.append_child(new_molecule) # Switch the node pointer to the new created molecule node. node = new_molecule # Next token. cursor += 1 # Go to root state. state = _STATE_ROOT elif state == _STATE_PREFIX_NUMBER: # Save the starting position of the prefix. pfx_start = token.get_position() # Read prefix numbers. has_pfx_number = False while token.is_operand(): # Mark the flag. has_pfx_number = True # Process the prefix number. node.set_prefix_number(node.get_prefix_number() * token.get_operand_value().simplify()) # Next token. cursor += 1 token = token_list[cursor] pfx = node.get_prefix_number().simplify() # Domain check. if pfx.is_negative or pfx.is_zero: err = _pe.Error(_ml_error.PE_ML_DOMAIN_ERROR, _msg_id.MSG_PE_ML_DOMAIN_ERROR_DESCRIPTION, options) err.push_traceback_ex(expression, pfx_start, token.get_position() - 1, _msg_id.MSG_PE_ML_DOMAIN_ERROR_PFX) raise err # Validate. if has_pfx_number and pfx == _math_cst.ONE: err = _pe.Error(_ml_error.PE_ML_USELESS_OPERAND, _msg_id.MSG_PE_ML_USELESS_OPERAND_DESCRIPTION, options) err.push_traceback_ex(expression, pfx_start, token.get_position() - 1, _msg_id.MSG_PE_ML_USELESS_OPERAND_PFX) raise err # Set the prefix number. node.set_prefix_number(pfx) # Go to root state. state = _STATE_ROOT elif state == _STATE_SUFFIX_NUMBER: # Save the starting position of the suffix. sfx_start = token.get_position() # Read suffix numbers. has_sfx_number = False while token.is_operand(): # Mark the flag. has_sfx_number = True # Process the suffix number. node.set_suffix_number(node.get_suffix_number() * token.get_operand_value().simplify()) # Next token. cursor += 1 token = token_list[cursor] sfx = node.get_suffix_number().simplify() # Domain check. if sfx.is_negative or sfx.is_zero: err = _pe.Error(_ml_error.PE_ML_DOMAIN_ERROR, _msg_id.MSG_PE_ML_DOMAIN_ERROR_DESCRIPTION, options) err.push_traceback_ex(expression, sfx_start, token.get_position() - 1, _msg_id.MSG_PE_ML_DOMAIN_ERROR_SFX) raise err # Validate. if has_sfx_number and sfx == _math_cst.ONE: err = _pe.Error(_ml_error.PE_ML_USELESS_OPERAND, _msg_id.MSG_PE_ML_USELESS_OPERAND_DESCRIPTION, options) err.push_traceback_ex(expression, sfx_start, token.get_position() - 1, _msg_id.MSG_PE_ML_USELESS_OPERAND_SFX) raise err # Register the ending position of current working node. node.register_ending_position_in_source_text(token.get_position() - 1) # Go to root state. state = _STATE_ROOT elif state == _STATE_MOLECULE_STATUS: # Raise an error if the token is not at the end of the molecule. if not token_list[cursor + 1].is_end(): err = _pe.Error(_ml_error.PE_ML_UNEXPECTED_TOKEN, _msg_id.MSG_PE_ML_UNEXPECTED_TOKEN_DESCRIPTION, options) err.push_traceback_ex(expression, token.get_position(), token.get_position() + len(token.get_symbol()) - 1, _msg_id.MSG_PE_ML_UNEXPECTED_TOKEN_STATUS_MISPLACED) raise err # Fetch the molecule status. if token.is_gas_status(): molecule_status = _ml_status.STATUS_GAS elif token.is_liquid_status(): molecule_status = _ml_status.STATUS_LIQUID elif token.is_solid_status(): molecule_status = _ml_status.STATUS_SOLID elif token.is_aqueous_status(): molecule_status = _ml_status.STATUS_AQUEOUS else: raise RuntimeError("BUG: Unrecognized status.") # Next token. cursor += 1 # Go to root state. state = _STATE_ROOT else: raise RuntimeError("BUG: Unrecognized state.") # Get the ending position. ending_pos = token_list[-1].get_position() - 1 # Initialize the parenthesis-mismatched flag. mismatch_flag = False # Pre-create an error. err = _pe.Error(_ml_error.PE_ML_PARENTHESIS_MISMATCH, _msg_id.MSG_PE_ML_PARENTHESIS_MISMATCH_DESCRIPTION, options) while node is not None: # Register the ending position of current working node. node.register_ending_position_in_source_text(ending_pos) # Mark the error flag and add an error description if current node is a parenthesis node. if node.is_parenthesis(): mismatch_flag = True err.push_traceback_ex(expression, node.get_starting_position_in_source_text(), node.get_starting_position_in_source_text(), _msg_id.MSG_PE_ML_PARENTHESIS_MISMATCH_MISSING_RIGHT) # Go to parent node. node = node.get_parent_node() # Raise an error if we have met at least 1 parenthesis node. if mismatch_flag: raise err # Now, we have constructed the whole AST, but we got a lot of useless hydrate group node. # So we have to remove them (all hydrate groups nodes which have only 1 child). # Get iterate order. unpack_order = _ast_bfs.do_bfs(root, True) # Initialize unpacked node container. unpacked = {} for node in unpack_order: if node.is_hydrate_group(): assert isinstance(node, _ast_base.ASTNodeHydrateGroup) if len(node) == 1: # Get the child node and reset its parent child = unpacked[id(node[0])] child.set_parent_node(node.get_parent_node()) # Save the unpack result. unpacked[id(node)] = child else: # Update children links. for child_id in range(0, len(node)): node[child_id] = unpacked[id(node[child_id])] # Save the unpack result. unpacked[id(node)] = node elif node.is_molecule(): assert isinstance(node, _ast_base.ASTNodeMolecule) # Update children links. for child_id in range(0, len(node)): node[child_id] = unpacked[id(node[child_id])] # Save the unpack result. unpacked[id(node)] = node elif node.is_parenthesis(): assert isinstance(node, _ast_base.ASTNodeParenthesisWrapper) # Update children links. node.set_inner_node(unpacked[id(node.get_inner_node())]) # Save the unpack result. unpacked[id(node)] = node else: # Save the unpack result. unpacked[id(node)] = node # Set molecule status. root = unpacked[id(root)] """:type : _ast_base.ASTNodeHydrateGroup | _ast_base.ASTNodeMolecule""" root.set_status(molecule_status) return root