Beispiel #1
0
    def AttachMgToPhosphateChain(
        mol: Molecule, chain_map: Dict[Group, list], assigned_mgs: Set[int]
    ) -> Set[int]:
        """Attach Mg2+ ions the appropriate groups in the chain.

        For each Mg2+ we see, we attribute it to a phosphate group if
        possible. We prefer to assign it to a terminal phosphate,
        but otherwise we assign it to a 'middle' group when there are 2 of
        them.

        :param mol: the molecule.
        :param chain_map: the groups in the chain.
        :param assigned_mgs: the set of Mg2+ ions that are already assigned.
        :return: The updated list of assigned Mg2+ ions.
        """

        def AddMg(p_group: Group, pmg_group: Group, mg: List[int]):
            node_set = chain_map[p_group].pop(0)
            mg_index = mg[0]
            node_set.add(mg_index)
            assigned_mgs.add(mg_index)
            chain_map[pmg_group].append(node_set)

        all_pmg_groups = (
            GroupsData.FINAL_PHOSPHATES_TO_MGS
            + GroupsData.MIDDLE_PHOSPHATES_TO_MGS
            + GroupsData.RING_PHOSPHATES_TO_MGS
        )
        for _mg in mol.FindSmarts("[Mg+2]"):
            if _mg[0] in assigned_mgs:
                continue

            for _p_group, _pmg_group in all_pmg_groups:
                if chain_map[_p_group]:
                    AddMg(_p_group, _pmg_group, _mg)
                    break

        return assigned_mgs
Beispiel #2
0
    def Decompose(
        self,
        mol: Molecule,
        ignore_protonations: bool = False,
        raise_exception: bool = False,
    ) -> GroupDecomposition:
        """Decompose a molecule into groups.

        The flag 'ignore_protonations' should be used when decomposing a
        compound with lacing protonation representation (for example,
        the KEGG database doesn't posses this information). If this flag is
        set to True, it overrides the '(C)harge sensitive' flag in the
        groups file (i.e. - *PC)

        :param mol: the molecule to decompose.
        :param ignore_protonations: whether to ignore protonation levels.
        :param raise_exception: whether to assert that there are no unassigned
        atoms.

        :return: A GroupDecomposition object containing the decomposition.
        """
        unassigned_nodes = set(range(len(mol)))
        groups: List[Tuple[Group, List[Set[int]]]] = []

        def _AddCorrection(group, count):
            """Add empty sets for each 'correction' group found."""
            list_of_sets = [set() for _ in range(count)]
            groups.append((group, list_of_sets))

        for group in self.groups_data.groups:
            # Phosphate chains require a special treatment
            if group.IsPhosphate():
                pchain_groups = None
                if group.IgnoreCharges() or ignore_protonations:
                    pchain_groups = self.FindPhosphateChains(
                        mol, ignore_protonations=True
                    )
                elif group.ChargeSensitive():
                    pchain_groups = self.FindPhosphateChains(
                        mol, ignore_protonations=False
                    )
                else:
                    raise MalformedGroupDefinitionError(
                        "Unrecognized phosphate wildcard: %s" % group.name
                    )

                for phosphate_group, group_nodesets in pchain_groups:
                    current_groups = []

                    for focal_set in group_nodesets:
                        if focal_set.issubset(unassigned_nodes):
                            # Check that the focal-set doesn't override an
                            # assigned node
                            current_groups.append(focal_set)
                            unassigned_nodes = unassigned_nodes - focal_set
                    groups.append((phosphate_group, current_groups))
            elif group.IsCodedCorrection():
                _AddCorrection(group, group.GetCorrection(mol))
            # Not a phosphate group or expanded correction.
            else:
                # TODO: if the 'ignore_protonation' flag is True,
                #  this should always use the pseudogroup with the lowest nH
                #  in each category regardless of the hydrogens in the given
                #  Mol.
                current_groups = []
                for nodes in mol.FindSmarts(group.smarts):
                    try:
                        focal_nodes = set(group.FilterFocalSet(nodes))
                    except IndexError as e:
                        logging.error(
                            "Focal set for group %s is out of range: %s"
                            % (str(group), str(group.focal_atoms))
                        )
                        raise e

                    # check that the focal-set doesn't override an assigned
                    # node
                    if focal_nodes.issubset(unassigned_nodes):
                        current_groups.append(focal_nodes)
                        unassigned_nodes = unassigned_nodes - focal_nodes
                groups.append((group, current_groups))

        # Ignore the hydrogen atoms when checking which atom is unassigned
        for nodes in mol.FindSmarts("[H]"):
            unassigned_nodes = unassigned_nodes - set(nodes)

        decomposition = GroupDecomposition(
            self.groups_data, mol, groups, unassigned_nodes
        )

        if raise_exception and decomposition.unassigned_nodes:
            raise GroupDecompositionError(
                f"Unable to decompose {mol} into groups", decomposition
            )

        return decomposition
Beispiel #3
0
    def FindPhosphateChains(
        mol: Molecule, max_length: int = 4, ignore_protonations: bool = False
    ) -> List[Tuple[Group, List[Set[int]]]]:
        """Find all phosphate chains.

        Chain end should be 'OC' for chains that do not really end, but link
        to carbons. Chain end should be '[O-1,OH]' for chains that end in an
        hydroxyl.

        :param mol: the molecule to decompose.
        :param max_length: the maximum length of a phosphate chain to consider.
        :param ignore_protonations: whether or not to ignore protonation values.

        :return: A list of 2-tuples (phosphate group, list of occurrences).
        """
        group_map = dict((pg, []) for pg in GroupsData.PHOSPHATE_GROUPS)
        v_charge = [a.GetFormalCharge() for a in mol.GetAtoms()]
        assigned_mgs = set()

        def pop_phosphate(
            pchain: List[int], p_size: int
        ) -> Tuple[Set[int], int]:
            if len(pchain) < p_size:
                raise Exception(
                    "trying to pop more atoms than are left in " "the pchain"
                )
            phosphate = pchain[0:p_size]
            charge = sum(v_charge[i] for i in phosphate)
            del pchain[0:p_size]
            return set(phosphate), charge

        def add_group(
            chain_map: Dict[Group, list],
            group_name: str,
            charge: int,
            atoms: Set[int],
        ) -> None:
            default = GroupsData.DEFAULTS[group_name]

            if ignore_protonations:
                chain_map[default].append(atoms)
            else:
                # NOTE(flamholz): We rely on the default number of
                # magnesiums being 0 (which it is).
                hydrogens = default.hydrogens + charge - default.charge
                group = Group(group_name, hydrogens, charge, default.nMg)
                if group not in chain_map:
                    # logging.warning('This protonation (%d) level is not
                    # allowed for terminal phosphate groups.' % hydrogens)
                    # logging.warning('Using the default protonation level (
                    # %d) for this name ("%s").' % (default.hydrogens,
                    # default.name))
                    raise GroupDecompositionError(
                        f"The group {group_name} cannot have nH = {hydrogens}"
                    )
                    # chain_map[default].append(atoms)
                else:
                    chain_map[group].append(atoms)

        # For each allowed length
        for length in range(1, max_length + 1):
            # Find internal phosphate chains (ones in the middle of the
            # molecule).
            smarts_str = GroupDecomposer._RingedPChainSmarts(length)
            chain_map = dict((k, []) for (k, _) in group_map.items())
            for pchain in mol.FindSmarts(smarts_str):
                working_pchain = list(pchain)
                working_pchain.pop()  # Lose the last carbon
                working_pchain.pop(0)  # Lose the first carbon

                if length % 2:
                    atoms, charge = pop_phosphate(working_pchain, 5)
                    add_group(chain_map, "ring -OPO3-", charge, atoms)
                else:
                    atoms, charge = pop_phosphate(working_pchain, 9)
                    add_group(chain_map, "ring -OPO3-OPO2-", charge, atoms)

                while working_pchain:
                    atoms, charge = pop_phosphate(working_pchain, 8)
                    add_group(chain_map, "ring -OPO2-OPO2-", charge, atoms)

            assigned_mgs = GroupDecomposer.AttachMgToPhosphateChain(
                mol, chain_map, assigned_mgs
            )
            GroupDecomposer.UpdateGroupMapFromChain(group_map, chain_map)

            # Find internal phosphate chains (ones in the middle of the
            # molecule).
            smarts_str = GroupDecomposer._InternalPChainSmarts(length)
            chain_map = dict((k, []) for (k, _) in group_map.items())
            for pchain in mol.FindSmarts(smarts_str):
                working_pchain = list(pchain)
                working_pchain.pop()  # Lose the last carbon
                working_pchain.pop(0)  # Lose the first carbon

                if length % 2:
                    atoms, charge = pop_phosphate(working_pchain, 5)
                    add_group(chain_map, "-OPO3-", charge, atoms)
                else:
                    atoms, charge = pop_phosphate(working_pchain, 9)
                    add_group(chain_map, "-OPO3-OPO2-", charge, atoms)

                while working_pchain:
                    atoms, charge = pop_phosphate(working_pchain, 8)
                    add_group(chain_map, "-OPO2-OPO2-", charge, atoms)

            assigned_mgs = GroupDecomposer.AttachMgToPhosphateChain(
                mol, chain_map, assigned_mgs
            )
            GroupDecomposer.UpdateGroupMapFromChain(group_map, chain_map)

            # Find terminal phosphate chains.
            smarts_str = GroupDecomposer._TerminalPChainSmarts(length)
            chain_map = dict((k, []) for (k, _) in group_map.items())
            for pchain in mol.FindSmarts(smarts_str):
                working_pchain = list(pchain)
                working_pchain.pop()  # Lose the carbon

                atoms, charge = pop_phosphate(working_pchain, 5)
                add_group(chain_map, "-OPO3", charge, atoms)

                if not length % 2:
                    atoms, charge = pop_phosphate(working_pchain, 4)
                    add_group(chain_map, "-OPO2-", charge, atoms)

                while working_pchain:
                    atoms, charge = pop_phosphate(working_pchain, 8)
                    add_group(chain_map, "-OPO2-OPO2-", charge, atoms)

            assigned_mgs = GroupDecomposer.AttachMgToPhosphateChain(
                mol, chain_map, assigned_mgs
            )
            GroupDecomposer.UpdateGroupMapFromChain(group_map, chain_map)

        return [(pg, group_map[pg]) for pg in GroupsData.PHOSPHATE_GROUPS]