def AttachMgToPhosphateChain( mol: Molecule, chain_map: Dict[Group, list], assigned_mgs: Set[int] ) -> Set[int]: """Attach Mg2+ ions the appropriate groups in the chain. For each Mg2+ we see, we attribute it to a phosphate group if possible. We prefer to assign it to a terminal phosphate, but otherwise we assign it to a 'middle' group when there are 2 of them. :param mol: the molecule. :param chain_map: the groups in the chain. :param assigned_mgs: the set of Mg2+ ions that are already assigned. :return: The updated list of assigned Mg2+ ions. """ def AddMg(p_group: Group, pmg_group: Group, mg: List[int]): node_set = chain_map[p_group].pop(0) mg_index = mg[0] node_set.add(mg_index) assigned_mgs.add(mg_index) chain_map[pmg_group].append(node_set) all_pmg_groups = ( GroupsData.FINAL_PHOSPHATES_TO_MGS + GroupsData.MIDDLE_PHOSPHATES_TO_MGS + GroupsData.RING_PHOSPHATES_TO_MGS ) for _mg in mol.FindSmarts("[Mg+2]"): if _mg[0] in assigned_mgs: continue for _p_group, _pmg_group in all_pmg_groups: if chain_map[_p_group]: AddMg(_p_group, _pmg_group, _mg) break return assigned_mgs
def Decompose( self, mol: Molecule, ignore_protonations: bool = False, raise_exception: bool = False, ) -> GroupDecomposition: """Decompose a molecule into groups. The flag 'ignore_protonations' should be used when decomposing a compound with lacing protonation representation (for example, the KEGG database doesn't posses this information). If this flag is set to True, it overrides the '(C)harge sensitive' flag in the groups file (i.e. - *PC) :param mol: the molecule to decompose. :param ignore_protonations: whether to ignore protonation levels. :param raise_exception: whether to assert that there are no unassigned atoms. :return: A GroupDecomposition object containing the decomposition. """ unassigned_nodes = set(range(len(mol))) groups: List[Tuple[Group, List[Set[int]]]] = [] def _AddCorrection(group, count): """Add empty sets for each 'correction' group found.""" list_of_sets = [set() for _ in range(count)] groups.append((group, list_of_sets)) for group in self.groups_data.groups: # Phosphate chains require a special treatment if group.IsPhosphate(): pchain_groups = None if group.IgnoreCharges() or ignore_protonations: pchain_groups = self.FindPhosphateChains( mol, ignore_protonations=True ) elif group.ChargeSensitive(): pchain_groups = self.FindPhosphateChains( mol, ignore_protonations=False ) else: raise MalformedGroupDefinitionError( "Unrecognized phosphate wildcard: %s" % group.name ) for phosphate_group, group_nodesets in pchain_groups: current_groups = [] for focal_set in group_nodesets: if focal_set.issubset(unassigned_nodes): # Check that the focal-set doesn't override an # assigned node current_groups.append(focal_set) unassigned_nodes = unassigned_nodes - focal_set groups.append((phosphate_group, current_groups)) elif group.IsCodedCorrection(): _AddCorrection(group, group.GetCorrection(mol)) # Not a phosphate group or expanded correction. else: # TODO: if the 'ignore_protonation' flag is True, # this should always use the pseudogroup with the lowest nH # in each category regardless of the hydrogens in the given # Mol. current_groups = [] for nodes in mol.FindSmarts(group.smarts): try: focal_nodes = set(group.FilterFocalSet(nodes)) except IndexError as e: logging.error( "Focal set for group %s is out of range: %s" % (str(group), str(group.focal_atoms)) ) raise e # check that the focal-set doesn't override an assigned # node if focal_nodes.issubset(unassigned_nodes): current_groups.append(focal_nodes) unassigned_nodes = unassigned_nodes - focal_nodes groups.append((group, current_groups)) # Ignore the hydrogen atoms when checking which atom is unassigned for nodes in mol.FindSmarts("[H]"): unassigned_nodes = unassigned_nodes - set(nodes) decomposition = GroupDecomposition( self.groups_data, mol, groups, unassigned_nodes ) if raise_exception and decomposition.unassigned_nodes: raise GroupDecompositionError( f"Unable to decompose {mol} into groups", decomposition ) return decomposition
def FindPhosphateChains( mol: Molecule, max_length: int = 4, ignore_protonations: bool = False ) -> List[Tuple[Group, List[Set[int]]]]: """Find all phosphate chains. Chain end should be 'OC' for chains that do not really end, but link to carbons. Chain end should be '[O-1,OH]' for chains that end in an hydroxyl. :param mol: the molecule to decompose. :param max_length: the maximum length of a phosphate chain to consider. :param ignore_protonations: whether or not to ignore protonation values. :return: A list of 2-tuples (phosphate group, list of occurrences). """ group_map = dict((pg, []) for pg in GroupsData.PHOSPHATE_GROUPS) v_charge = [a.GetFormalCharge() for a in mol.GetAtoms()] assigned_mgs = set() def pop_phosphate( pchain: List[int], p_size: int ) -> Tuple[Set[int], int]: if len(pchain) < p_size: raise Exception( "trying to pop more atoms than are left in " "the pchain" ) phosphate = pchain[0:p_size] charge = sum(v_charge[i] for i in phosphate) del pchain[0:p_size] return set(phosphate), charge def add_group( chain_map: Dict[Group, list], group_name: str, charge: int, atoms: Set[int], ) -> None: default = GroupsData.DEFAULTS[group_name] if ignore_protonations: chain_map[default].append(atoms) else: # NOTE(flamholz): We rely on the default number of # magnesiums being 0 (which it is). hydrogens = default.hydrogens + charge - default.charge group = Group(group_name, hydrogens, charge, default.nMg) if group not in chain_map: # logging.warning('This protonation (%d) level is not # allowed for terminal phosphate groups.' % hydrogens) # logging.warning('Using the default protonation level ( # %d) for this name ("%s").' % (default.hydrogens, # default.name)) raise GroupDecompositionError( f"The group {group_name} cannot have nH = {hydrogens}" ) # chain_map[default].append(atoms) else: chain_map[group].append(atoms) # For each allowed length for length in range(1, max_length + 1): # Find internal phosphate chains (ones in the middle of the # molecule). smarts_str = GroupDecomposer._RingedPChainSmarts(length) chain_map = dict((k, []) for (k, _) in group_map.items()) for pchain in mol.FindSmarts(smarts_str): working_pchain = list(pchain) working_pchain.pop() # Lose the last carbon working_pchain.pop(0) # Lose the first carbon if length % 2: atoms, charge = pop_phosphate(working_pchain, 5) add_group(chain_map, "ring -OPO3-", charge, atoms) else: atoms, charge = pop_phosphate(working_pchain, 9) add_group(chain_map, "ring -OPO3-OPO2-", charge, atoms) while working_pchain: atoms, charge = pop_phosphate(working_pchain, 8) add_group(chain_map, "ring -OPO2-OPO2-", charge, atoms) assigned_mgs = GroupDecomposer.AttachMgToPhosphateChain( mol, chain_map, assigned_mgs ) GroupDecomposer.UpdateGroupMapFromChain(group_map, chain_map) # Find internal phosphate chains (ones in the middle of the # molecule). smarts_str = GroupDecomposer._InternalPChainSmarts(length) chain_map = dict((k, []) for (k, _) in group_map.items()) for pchain in mol.FindSmarts(smarts_str): working_pchain = list(pchain) working_pchain.pop() # Lose the last carbon working_pchain.pop(0) # Lose the first carbon if length % 2: atoms, charge = pop_phosphate(working_pchain, 5) add_group(chain_map, "-OPO3-", charge, atoms) else: atoms, charge = pop_phosphate(working_pchain, 9) add_group(chain_map, "-OPO3-OPO2-", charge, atoms) while working_pchain: atoms, charge = pop_phosphate(working_pchain, 8) add_group(chain_map, "-OPO2-OPO2-", charge, atoms) assigned_mgs = GroupDecomposer.AttachMgToPhosphateChain( mol, chain_map, assigned_mgs ) GroupDecomposer.UpdateGroupMapFromChain(group_map, chain_map) # Find terminal phosphate chains. smarts_str = GroupDecomposer._TerminalPChainSmarts(length) chain_map = dict((k, []) for (k, _) in group_map.items()) for pchain in mol.FindSmarts(smarts_str): working_pchain = list(pchain) working_pchain.pop() # Lose the carbon atoms, charge = pop_phosphate(working_pchain, 5) add_group(chain_map, "-OPO3", charge, atoms) if not length % 2: atoms, charge = pop_phosphate(working_pchain, 4) add_group(chain_map, "-OPO2-", charge, atoms) while working_pchain: atoms, charge = pop_phosphate(working_pchain, 8) add_group(chain_map, "-OPO2-OPO2-", charge, atoms) assigned_mgs = GroupDecomposer.AttachMgToPhosphateChain( mol, chain_map, assigned_mgs ) GroupDecomposer.UpdateGroupMapFromChain(group_map, chain_map) return [(pg, group_map[pg]) for pg in GroupsData.PHOSPHATE_GROUPS]