예제 #1
0
파일: utils.py 프로젝트: zie225/2019-nCov
def to_graph(smiles, dataset):
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        return [], []
    # Kekulize it
    if need_kekulize(mol):
        rdmolops.Kekulize(mol)
        if mol is None:
            return None, None
    # remove stereo information, such as inward and outward edges
    Chem.RemoveStereochemistry(mol)

    edges = []
    nodes = []
    for bond in mol.GetBonds():
        edges.append((bond.GetBeginAtomIdx(), bond_dict[str(bond.GetBondType())], bond.GetEndAtomIdx()))
        assert bond_dict[str(bond.GetBondType())] != 3
    for atom in mol.GetAtoms():
        if dataset=='qm9' or dataset=="cep":
            nodes.append(onehot(dataset_info(dataset)['atom_types'].index(atom.GetSymbol()), len(dataset_info(dataset)['atom_types'])))
        elif dataset=='zinc': # transform using "<atom_symbol><valence>(<charge>)"  notation
            symbol = atom.GetSymbol()
            valence = atom.GetTotalValence()
            charge = atom.GetFormalCharge()
            atom_str = "%s%i(%i)" % (symbol, valence, charge)
            
            if atom_str not in dataset_info(dataset)['atom_types']:
                print('unrecognized atom type %s' % atom_str)
                return [], []

            nodes.append(onehot(dataset_info(dataset)['atom_types'].index(atom_str), len(dataset_info(dataset)['atom_types'])))

    return nodes, edges
    def _build_drug_graph(self, smiles):
        """
        Builds a molecular graph form a smiles string.  Taken from [FIND SOURCE!]
        """
        mol = Chem.MolFromSmiles(smiles)
        if mol is None:
            raise ValueError('Molecule construction failed on Inchi %s' %
                             smiles)
        # Kekulize it
        if self.need_kekulize(mol):
            rdmolops.Kekulize(mol)
            if mol is None:
                return None, None
        # remove stereo information, such as inward and outward edges
        Chem.RemoveStereochemistry(mol)

        edges = []
        nodes = []
        for bond in mol.GetBonds():
            edges.append((bond.GetBeginAtomIdx(),
                          self.bond_dict[str(bond.GetBondType())],
                          bond.GetEndAtomIdx()))
            assert self.bond_dict[str(bond.GetBondType())] != 3
        for atom in mol.GetAtoms():
            # This could probably be spead up....
            nodes.append(
                onehot(self.dataset_info['atom_types'].index(atom.GetSymbol()),
                       len(self.dataset_info['atom_types'])))

        nodes = torch.tensor(nodes).float()
        edges = torch.tensor(edges)

        return nodes, edges, mol
예제 #3
0
def to_graph(smiles, dataset):
    mol = Chem.MolFromSmiles(smiles)
    mol = Chem.AddHs(mol)
    if mol is None:
        return [], []
    if need_kekulize(mol):
        rdmolops.Kekulize(mol)
        if mol is None:
            return [], []
    Chem.RemoveStereochemistry(mol)

    edges = []
    nodes = []
    for bond in mol.GetBonds():
        edges.append(
            (bond.GetBeginAtomIdx(), bond_dict[str(bond.GetBondType())],
             bond.GetEndAtomIdx()))
    for atom in mol.GetAtoms():
        symbol = atom.GetSymbol()
        valence = atom.GetTotalValence()
        charge = atom.GetFormalCharge()
        atom_str = "%s%i(%i)" % (symbol, valence, charge)

        if atom_str not in dataset_info(dataset)['atom_types']:
            return [], []
        nodes.append(
            onehot(
                dataset_info(dataset)['atom_types'].index(atom_str),
                len(dataset_info(dataset)['atom_types'])))
    return nodes, edges
예제 #4
0
    def prepare_mol(self, mol: rdchem.Mol) -> Tuple[str, rdchem.Mol]:
        """Prepare both smiles and mol by standardizing to common rules.

        This method should be called before `get_input_feats`.

        Params:
        -------
        mol: rdkit.Chem.rdchem.Mol
            Molecule of interest.

        Returns:
        --------
        canonical_smiles: str
            Canonical SMILES representation of the molecule. 

        mol: rdkit.Chem.rdchem.Mol
            Modified molecule w/ kekulization and Hs added, if specified.
        """
        canonical_smiles = rdmolfiles.MolToSmiles(mol, canonical=True)
        mol = rdmolfiles.MolFromSmiles(canonical_smiles)

        if self.add_Hs:
            mol = rdmolops.AddHs(mol)
        if self.kekulize:
            rdmolops.Kekulize(mol)
        return canonical_smiles, mol
예제 #5
0
def to_graph(mol, dataset):
    if mol is None:
        return [], []
    if need_kekulize(mol):
        rdmolops.Kekulize(mol)
        if mol is None:
            return None, None
    Chem.RemoveStereochemistry(mol)
    edges = []
    nodes = []
    atom_types = get_atom_types(dataset)
    for bond in mol.GetBonds():
        edges.append(
            (bond.GetBeginAtomIdx(), bond_dict[str(bond.GetBondType())],
             bond.GetEndAtomIdx()))
        assert bond_dict[str(bond.GetBondType())] != 3
    for atom in mol.GetAtoms():
        nodes.append(
            onehot(atom_types.index(str(atom.GetSymbol())), len(atom_types)))
    return nodes, edges
        if test_mol != None:
            energy = get_BO_energy(raw_mol)
            if smiles not in smiles_list and energy_of_reactant - energy < E_cutoff:
                smiles_list.append(smiles)
                molecules.append(raw_mol)

    smiles_list.insert(0, Chem.MolToSmiles(mol))
    molecules.insert(0, mol)

    return smiles_list, molecules


if __name__ == "__main__":
    smiles_list = ['CC', 'C=C', 'C#C']
    smiles_list = ['C=C.C=CC=C']
    smiles_list = ['C(=O)O']

    heterolytic = False
    E_cutoff = 200

    for smiles in smiles_list:
        mol = Chem.MolFromSmiles(smiles)
        rdmolops.Kekulize(mol, clearAromaticFlags=True)
        charge = Chem.GetFormalCharge(mol)
        mol = Chem.AddHs(mol)
        elementary_smiles, elementary_mols = take_elementary_step(
            mol, charge, E_cutoff, heterolytic)

        print "len(elementary_smiles)", len(elementary_smiles)
        print elementary_smiles