예제 #1
0
 def __init__(self,
              donor="[#7,#8,#16][H]",
              acceptor="[N,O,F,-{1-};!+{1-}]",
              distance=3.5,
              angles=(130, 180)):
     self.donor = MolFromSmarts(donor)
     self.acceptor = MolFromSmarts(acceptor)
     self.distance = distance
     self.angles = tuple(radians(i) for i in angles)
예제 #2
0
 def __init__(self,
              cation="[+{1-}]",
              pi_ring=("a1:a:a:a:a:a:1", "a1:a:a:a:a:1"),
              distance=4.5,
              angles=(0, 30)):
     self.cation = MolFromSmarts(cation)
     self.pi_ring = [MolFromSmarts(s) for s in pi_ring]
     self.distance = distance
     self.angles = tuple(radians(i) for i in angles)
예제 #3
0
 def __init__(self,
              donor="[#6,#7,Si,F,Cl,Br,I]-[Cl,Br,I,At]",
              acceptor="[#7,#8,P,S,Se,Te,a;!+{1-}][*]",
              distance=3.5,
              axd_angles=(130, 180),
              xar_angles=(80, 140)):
     self.donor = MolFromSmarts(donor)
     self.acceptor = MolFromSmarts(acceptor)
     self.distance = distance
     self.axd_angles = tuple(radians(i) for i in axd_angles)
     self.xar_angles = tuple(radians(i) for i in xar_angles)
예제 #4
0
 def _rules_molvs():
     """Rules to neutralize compounds. Inspired by molvs."""
     ans = {}
     # Neutralizable positive charge (with hydrogens attached)
     # ans["pos_h"] = Chem.MolFromSmarts('[+!H0!$(*~[-])]')
     ans["pos_h"] = MolFromSmarts('[+!H0]')
     # Non-neutralizable positive charge (no hydrogens attached)
     # ans["pos_quat"] = Chem.MolFromSmarts('[+H0!$(*~[-])]')
     # Negative charge, not bonded to a positive charge with no hydrogens
     # ans["neg"] = Chem.MolFromSmarts('[-!$(*~[+H0])]')
     ans["neg"] = MolFromSmarts('[-]')
     # Negative oxygen bonded to [C,P,S]=O, negative aromatic nitrogen?
     # ans["neg_acid"] = Chem.MolFromSmarts('[$([O-][C,P,S]=O),$([n-]1nnnc1),$(n1[n-]nnc1)]')
     return ans
예제 #5
0
파일: removehs.py 프로젝트: rhara/plifinder
def _atom_matches_smarts(atom, smarts):
    idx = atom.GetIdx()
    patt = MolFromSmarts(smarts)
    for m in atom.GetOwningMol().GetSubstructMatches(patt):
        if idx in m:
            return True
    return False
예제 #6
0
def get_MCSs(test_mols, known_mols, nns_indices=None, murcko_scaff=False):
    
    if nns_indices is None:
        nns_indices = [np.arange(len(known_mols))]*len(test_mols)
    
    if murcko_scaff:
        f = lambda x: GetScaffoldForMol(x)
    else:
        f = lambda x: x
        
    known_mols = np.array(known_mols)
    
    MCSs, MCS_matches, NN_mols, NN_MCS_matches  = [], [], [], []
    
    for query_mol, nn_i in list(zip(test_mols, nns_indices)):
        
        known_subset = known_mols[nn_i]
        
        query_MCS = [get_mcs(query_mol, m, f) for m in tqdm(known_subset)]
        query_MCS_sim = [m.numAtoms for m in query_MCS]
        NN_mol = known_subset[np.argmax(query_MCS_sim)]
        mcs = query_MCS[np.argmax(query_MCS_sim)]        
        mcs_mol = MolFromSmarts(mcs.smartsString)
        NN_mol_match = NN_mol.GetSubstructMatch(mcs_mol)
        query_mol_match = query_mol.GetSubstructMatch(mcs_mol)
        
        MCSs.append(mcs)
        MCS_matches.append(query_mol_match)
        NN_mols.append(NN_mol)
        NN_MCS_matches.append(NN_mol_match)
        
    return MCSs, MCS_matches, NN_mols, NN_MCS_matches
예제 #7
0
 def __init__(self,
              centroid_distance=6.0,
              shortest_distance=3.8,
              plane_angles=(0, 90),
              pi_ring=("a1:a:a:a:a:a:1", "a1:a:a:a:a:1")):
     self.pi_ring = [MolFromSmarts(s) for s in pi_ring]
     self.centroid_distance = centroid_distance
     self.shortest_distance = shortest_distance**2
     self.plane_angles = tuple(radians(i) for i in plane_angles)
예제 #8
0
    def _parse_smarts_keys(smarts_keys: list) -> list:
        olist = []

        for key in smarts_keys:
            olist.append(MolFromSmarts(key))
            if not olist[-1]:
                print("Could not parse SMARTS '%s'." % key)

        return olist
예제 #9
0
def check_similarity_between_generic_and_complete_representation(
        generic_smiles, complete_smiles):
    complete_smiles, _ = NeutraliseCharges(complete_smiles)
    generic_smiles, _ = NeutraliseCharges(generic_smiles)
    complete_mol = MolFromSmiles(complete_smiles)
    generic_mol = MolFromSmarts(generic_smiles)
    match = complete_mol.GetSubstructMatch(generic_mol)
    if match:
        return True
    return False
예제 #10
0
    def matches(self, smarts):
        """
        Returns true if the RDMol matches the given SMARTS query. Uses a client-side
        RDKit installation.

        Returns
        -------
        matches : boolean
            True if the rdmol molecule attribute matches the given SMARTS query.
        """
        return self.rdmol.HasSubstructMatch(MolFromSmarts(smarts))
예제 #11
0
def klekota_roth(mols):
    from ._krfp_smarts import KRFP_SMARTS
    return (
        _rdkit_dense_fingerprinter(
            mols=mols,
            which=(lambda mol: [
                len(mol.GetSubstructMatches(MolFromSmarts(smarts)))
                for smarts in KRFP_SMARTS
            ]),
        ),
        _zfill_dense_header(np.array(range(len(KRFP_SMARTS)), dtype=np.str)),
    )
예제 #12
0
class OriginalRule03(ScaffoldMinFilterRule):
    """Choose the parent scaffold with the smallest number of acyclic linker bonds"""

    acyc_linker_smarts = MolFromSmarts('*!@!=!#*')

    def get_property(self, child, parent):
        matches = parent.mol.GetSubstructMatches(self.acyc_linker_smarts)
        return len(matches)

    @property
    def name(self):
        return 'original rule 03'
예제 #13
0
파일: removehs.py 프로젝트: rhara/plifinder
def _amide_bond(bond):
    a1 = bond.GetBeginAtom()
    a2 = bond.GetEndAtom()
    if ((a1.GetAtomicNum() == 6 and
         a2.GetAtomicNum() == 7 ) or
        (a2.GetAtomicNum() == 6 and
         a1.GetAtomicNum() == 7)):
        # https://github.com/rdkit/rdkit/blob/master/Data/FragmentDescriptors.csv
        patt = MolFromSmarts('C(=O)-N')
        for m in bond.GetOwningMol().GetSubstructMatches(patt):
            if a1.GetIdx() in m and a2.GetIdx() in m:
                return True
    return False
예제 #14
0
 def _rules_rdkit():
     patts = (
         ('[n+;H]', 'n'),  # Imidazoles
         ('[N+;!H0]', 'N'),  # Amines
         ('[$([O-]);!$([O-][#7])]', 'O'),  # Carboxylic acids and alcohols
         ('[S-;X1]', 'S'),  # Thiols
         ('[$([N-;X2]S(=O)=O)]', 'N'),  # Sulfonamides
         ('[$([N-;X2][C,N]=C)]', 'N'),  # Enamines
         ('[n-]', '[nH]'),  # Tetrazoles
         ('[$([S-]=O)]', 'S'),  # Sulfoxides
         ('[$([N-]C=O)]', 'N'),  # Amides
     )
     return [(MolFromSmarts(x), MolFromSmiles(y, False)) for x, y in patts]
예제 #15
0
def OH_count(smile):
    '''
    Given the SMILES, this function compute the number of OH group in the chemical.
    Inputs:
        - smiles (str)
    Outputs:
        - Count of OH group in the molecule (int) (NaN if not found)
    '''
    try:
        m = MolFromSmiles(smile)
        patt = MolFromSmarts('[OX2H]')
        return len(m.GetSubstructMatches(patt))
    except:
        return 'NaN'
예제 #16
0
    def parse_sma(self, sma):
        """parse smarts and return SmartsPattern after storing in global dict
			 or return from global dict"""
        if sma in self.pat:
            return self.pat[sma]

        newpat = MolFromSmarts(sma)
        if newpat:
            if len(self.pat) < self.maxsma:
                #plpy.notice('new pat for "%s"' % sma)
                pass
            else:
                self.pat.popitem()
                #key,pat = self.pat.popitem()
                #plpy.notice('pattern reuse %s for %s' % (key,sma))
            self.pat[sma] = newpat
            return newpat
        else:
            #plpy.notice('pattern None')
            return None
예제 #17
0
class OriginalRule12(ScaffoldFilterRule):
    """Remove rings first where the linker is attached to a ring hetero atom at either end of the linker"""

    connection_patt = MolFromSmarts('[R]!@!=*')

    def condition(self, child, parent):
        removed_ring = child.rings[parent.removed_ring_idx]
        connections = {
            x[0]
            for x in child.mol.GetSubstructMatches(self.connection_patt)
        }
        ring_connections = connections.intersection(removed_ring.aix)
        connection_atomic_nums = [
            child.atoms[x].GetAtomicNum() for x in ring_connections
        ]
        return len([a
                    for a in connection_atomic_nums if a != 1 and a != 6]) > 0

    @property
    def name(self):
        return 'original rule 12'
예제 #18
0
def _mols2imageStream(mols, f, format, size, legend, highlightMatch=None):
    """Return an input stream for the molecule as drawn"""
    highlights = None
    if highlightMatch:
        pattern = MolFromSmarts(highlightMatch)
        highlights = [mol.GetSubstructMatch(pattern) for mol in mols]
    kek = True
    if mols[0].HasProp("_drawingBondsWedged"):
        kek = False

    fit = False
    options = DrawingOptions()
    subim = (size, size)
    if size > 150:
        subim = (size * 2, size * 2)

        options.coordScale = 3
        options.bondLineWidth = 3.6
        options.dblBondOffset = 0.435
        options.atomLabelFontSize = 60
        if kek:
            options.bondLineWidth = 4.5
            options.dblBondOffset = 0.6
            options.atomLabelFontSize = 150

        fit = True
    elif kek:
        options.dblBondOffset = 0.4

    image = Draw.MolsToGridImage(mols,
                                 molsPerRow=min(len(mols), 4),
                                 subImgSize=subim,
                                 kekulize=kek,
                                 highlightAtomLists=highlights,
                                 fitImage=fit,
                                 options=options)
    image.save(f, format)
예제 #19
0
class SCPNumLinkerBonds(_MinMaxScaffoldFilterRule):
    """Filter by number of linker bonds in the parent scaffold.

    Specify 'min' to prioritize scaffolds with the smallest
    number of acyclic linker bonds.

    Specify 'max' to prioritize scaffolds with the largest
    number of acyclic linker bonds.

    Parameters
    ----------
    min_max : {'min', 'max'}
        Specify 'min' or 'max' to define the function used to
        prioritize scaffolds based on the returned property.

    """
    acyc_linker_smarts = MolFromSmarts('*!@!=!#*')

    def __init__(self, min_max):
        super().__init__(min_max)

    def get_property(self, child, parent):
        matches = parent.mol.GetSubstructMatches(self.acyc_linker_smarts)
        return len(matches)
예제 #20
0
        if mol is not None:
            strings.append(s)
            values.append(v)
    return strings, values


def duplicate_analysis(df, key_col, val_col):
    idx_dup_f = df.duplicated(subset=key_col, keep="first")
    idx_dup = df.duplicated(subset=key_col, keep=False)
    per_dup = np.sum(idx_dup_f) / len(df)
    df_dup = df[idx_dup]
    stds = df_dup.groupby(key_col)[val_col].std()
    return per_dup, stds


PATTERN = MolFromSmarts("[+1!h0!$([*]~[-1,-2,-3,-4]),-1!$([*]~[+1,+2,+3,+4])]")


def neutralize_atoms(mol, pattern):
    at_matches = mol.GetSubstructMatches(pattern)
    at_matches_list = [y[0] for y in at_matches]
    if len(at_matches_list) > 0:
        for at_idx in at_matches_list:
            atom = mol.GetAtomWithIdx(at_idx)
            chg = atom.GetFormalCharge()
            hcount = atom.GetTotalNumHs()
            atom.SetFormalCharge(0)
            atom.SetNumExplicitHs(hcount - chg)
            atom.UpdatePropertyCache()
    return mol
예제 #21
0
def count_rings(mol):
    match = MolFromSmarts('c1ccccc1')
    ret = mol.GetSubstructMatches(match)
    n_rings = len(ret)
    return n_rings
예제 #22
0
파일: io.py 프로젝트: bkbonde/chembl_beaker
def _molFromSmarts(smarts):
    return MolFromSmarts(str(smarts))
예제 #23
0
 def __init__(self, lig_pattern, prot_pattern, distance):
     self.lig_pattern = MolFromSmarts(lig_pattern)
     self.prot_pattern = MolFromSmarts(prot_pattern)
     self.distance = distance
예제 #24
0
def smarts_reader(smarts, **kwargs):
    kwargs.setdefault('mergeHs', True)
    return MolFromSmarts(smarts, **kwargs)