예제 #1
0
def get_filter_values(mol):
    """
    calculate the values, for a given molecule, that are used to filter
    return as a dictionary
    """

    assert isinstance(mol, Chem.Mol)

    values = {}
    values["MW"] = desc.CalcExactMolWt(mol)
    values["logP"] = crip.MolLogP(mol)
    values["HBA"] = lip.NumHAcceptors(mol)
    values["HBD"] = lip.NumHDonors(mol)
    values["tPSA"] = desc.CalcTPSA(mol)
    values["rot_bonds"] = lip.NumRotatableBonds(mol)
    values["rigid_bonds"] = mol.GetNumBonds() - values["rot_bonds"]  # assume mutual exclusion
    values["num_rings"] = lip.RingCount(mol)
    values["num_hetero_atoms"] = lip.NumHeteroatoms(mol)
    values["charge"] = rdmolops.GetFormalCharge(mol)  # trusting this charge calculation method
    values["num_carbons"], values["num_charges"], values["max_ring_size"] = get_atom_props(mol)
    try:
        values["hc_ratio"] = float(values["num_hetero_atoms"]) / float(values["num_carbons"])
    except ZeroDivisionError:
        values["hc_ratio"] = 100000000  # if there are zero carbons
    values["fc"] = len(list(Brics.FindBRICSBonds(mol)))  # how many BRICS bonds, related to complexity
    values["is_good"] = True  # default to true, but not yet observed
    atoms = [atom.GetSymbol() for atom in mol.GetAtoms()]  # get all the atoms, and make the list unique (only types)
    atoms = set(atoms)
    atoms = list(atoms)
    values["atoms"] = atoms
    values["num_chiral_centers"] = len(Chem.FindMolChiralCenters(mol, includeUnassigned=True))
    values["rejections"] = []  # empty list to store the reasons for rejection

    return values
예제 #2
0
def getAllFeatures(SMILE, targets):

    m = Chem.MolFromSmiles(SMILE)

    MW = props.MolWt(m)  # molecular weight
    XlogP = props.MolLogP(m)  # octanol-water partition coefficient log P
    HBD = props.NumHDonors(m)  #hydrogen bond donor count
    HBA = props.NumHAcceptors(m)  #hydrogen bond acceptor count
    PSA = props.TPSA(m)  #polar surface area
    FC = rdmolops.GetFormalCharge(m)  #formal charge
    RBC = props.NumRotatableBonds(m)  #rotatable bonds count
    refr = props.MolMR(m)  # refractivity
    alogP = None  #
    nA = m.GetNumAtoms()  #sum(atomcountMA(mol,addH=FALSE)) #number atoms
    AROMs = props.NumAromaticRings(m)
    nALERTS = len([
        1 for i in data["ualerts"]
        if m.HasSubstructMatch(Chem.MolFromSmarts(i))
    ])

    Ro5 = int(MW < 500 and HBD < 5 and HBA < 10 and XlogP < 5)
    Veber = int(RBC <= 10 and PSA <= 140)
    Ghose = int(PSA < 140 and (-0.4 <= XlogP < 5.6) and (160 <= MW < 480)
                and (20 <= nA < 70))
    QED = Chem.QED.qed(m)

    t_set = set(targets)
    (lof, btwn,
     degree) = (data['target_data']['lof'], data['target_data']['btwn'],
                data['target_data']['degree'])

    lossFreq = max(
        [lof.xs(i)[1] / lof.xs(i)[2] for i in lof.index if i in t_set])
    maxBtwn = max([btwn.xs(i)[0] for i in btwn.index if i in t_set])
    maxDegree = max([degree.xs(i)[0] for i in degree.index if i in t_set])

    pc = get_PC_value(data["target_data"]["expr"], targets)

    return {
        'MolecularWeight': MW,
        'XLogP': XlogP,
        'HydrogenBondDonorCount': HBD,
        'HydrogenBondAcceptorCount': HBA,
        'PolarSurfaceArea': PSA,
        'FormalCharge': FC,
        'NumRings': AROMs,
        'RotatableBondCount': RBC,
        'Refractivity': refr,
        'lossFreq': lossFreq,
        'maxBtwn': maxBtwn,
        'maxDegree': maxDegree,
        # 'Ro5': Ro5,
        # 'Ghose': Ghose,
        # 'Veber': Veber,
        'wQED': QED,
        'PC1': pc[0],
        'PC2': pc[1],
        'PC3': pc[2],
    }
예제 #3
0
def main(fname):
    mol = Chem.MolFromMolFile(fname)
    if mol:
        charge = rdmolops.GetFormalCharge(mol)
    else:
        sys.stderr.write(
            'Molecule from file {} cannot be parsed'.format(fname))
        return None
    return charge
예제 #4
0
def molobj_to_axyzc(molobj, atom_type=int, idx=-1):
    """
    rdkit molobj to xyz
    """

    atoms = molobj_to_atoms(molobj, atom_type=atom_type)
    coordinates = molobj_to_coordinates(molobj, idx=idx)
    charge = rdmolops.GetFormalCharge(molobj)

    return atoms, coordinates, charge
예제 #5
0
    def parametrise(self,
                    params=None,
                    molecule_type="ligand",
                    id=None,
                    reparametrise=False):
        """
        Parametrises the ligand using ProtoCaller.Parametrise.

        Parameters
        ----------
        params : ProtoCaller.Parametrise.Params
            Force field parameters.
        molecule_type : str
            The type of the molecule. One of: "ligand" and "cofactor".
        id : str
            The name of the molecule. Default: equal to the ligand name.
        reparametrise : bool
            Whether to reparametrise an already parametrised ligand.
        """
        with self.workdir:
            if self._parametrised and not reparametrise:
                _logging.debug("Ligand %s is already parametrised." %
                               self.name)
                return

            _logging.info("Parametrising ligand %s..." % self.name)
            if not self.protonated:
                _logging.warning(
                    "Cannot parametrise unprotonated ligand. Protonating first with default parameters..."
                )
                self.protonate()

            if params is None:
                params = _parametrise.Params()

            # we convert the protonated file into a pdb so that antechamber can read it
            filename = _babel.babelTransform(self.protonated_filename, "pdb")
            if id is None: id = self.name

            charge = _rdmolops.GetFormalCharge(self.molecule)
            self.parametrised_files = _parametrise.parametriseFile(
                params=params,
                filename=filename,
                molecule_type=molecule_type,
                id=id,
                charge=charge)
예제 #6
0
파일: timber.py 프로젝트: chemlove/timber
def run_antechamber(mol, sdf_file, ff):
    net_charge = int(rdmolops.GetFormalCharge(mol))

    os.system(
        'antechamber -i %s -fi sdf -o UNL.mol2 -fo mol2 -rn UNL -nc %d -at %s -c bcc -s 0 -pf y'
        % (sdf_file, net_charge, ff))

    os.system('parmchk -i UNL.mol2 -f mol2 -o missing_gaff.frcmod -at %s' %
              (ff))

    # clean SDF file for rdkit
    os.system('antechamber -i UNL.mol2 -fi mol2 -o UNL.sdf -fo sdf')

    with open('convert.leap', 'w') as f:
        f.write('source leaprc.%s\n' % (ff))
        f.write('UNL=loadmol2 UNL.mol2\n')
        f.write('saveoff UNL UNL.off\n')
        f.write('quit')

    os.system('tleap -f convert.leap>out')
예제 #7
0
def get_monoisotopic_mz_and_z(structure):
    """
    Determines the monoisotopic m/z value and charge of an ion provided as a SMILES string or .sdf file.
    :param structure:    str     a valid SMILES string OR a path to an .sdf file containg a single ion structure.
    :return out_dict:    dict    w/ entries "charge" (int) and "monoiso_mz" (float in Daltons) and rdkit mol obj.
    """
    # parse input
    try:
        mol = Chem.MolFromSmiles(structure)
        if mol is None:
            raise TypeError(
                'The provided structure was not a valid SMILES, assuming it is a path to an .sdf file...'
            )
    except TypeError:
        try:
            lst = [mol for mol in Chem.SDMolSupplier(structure)]
            mol = lst[0]
        except OSError:
            raise TypeError(
                'The provide structure was neither a valid SMILES string nor a path to an .sdf file.'
            )

    # ensure mol exists
    if not mol:
        raise NotImplementedError(
            'For unknown reasons, the provided structure could not be analyzed.'
        )

    # determine properties of mol
    monoiso_mz = rdMolDescriptors.CalcExactMolWt(mol)
    charge = rdmolops.GetFormalCharge(mol)

    # ensure provided structure is of an ion
    if not charge:
        raise ValueError(
            'Provided structures must be of ions, not neutral molecules.')

    charge = int(charge)
    out_dict = {'charge': charge, 'monoiso_mz': monoiso_mz, 'mol': mol}
    return out_dict
예제 #8
0
def datadump(database, dumpdir):

    db = pickle.load(open(database, "rb"))

    if os.path.exists(dumpdir):
        raise Warning(
            "Caution, %s already exists. Already existing data may be overwritten."
        )
    else:
        os.mkdir(dumpdir)
        os.mkdir(dumpdir + "/png")

    frag2mol = db.get_frag2mol()
    frag2lcapconn = db.get_frag2lcapconn()
    frag2rcapconn = db.get_frag2rcapconn()
    mol2frag = db.get_mol2frag()
    mol2conn = db.get_mol2conn()

    frag_log = logger(dumpdir + "/frag.dat")
    frag_log.log("### datadump of database %s" % database)
    frag_log.log("### timestamp %s" %
                 time.asctime(time.localtime(time.time())))
    frag_log.log("### written by run_fragresp.py datadump routine.")
    frag_log.log("###")
    frag_log.log("### ----------------- ###")
    frag_log.log("### FRAGMENT DATA LOG ###")
    frag_log.log("### ----------------- ###")
    frag_log.log("###")
    frag_log.log(
        "# id smiles mol_id lcap_id rcap_id Natoms Nbonds Nnonhatoms Chg Nhbd Nhba Nrotbonds Nrings"
    )

    for frag_i in range(db.get_frag_count()):
        frag = db.get_frag(frag_i)
        Chem.SanitizeMol(frag)

        log_str = list()

        ### id
        log_str.append(str(frag_i) + " ")
        ### smiles
        log_str.append(str(Chem.MolToSmiles(frag, isomericSmiles=True)) + " ")

        ### mol_id
        mol_count = len(frag2mol[frag_i])
        if mol_count == 0:
            log_str.append("-1 ")
        else:
            for i in range(mol_count):
                mol_i = frag2mol[frag_i][i]
                if i < mol_count - 1:
                    log_str.append(str(mol_i) + ",")
                else:
                    log_str.append(str(mol_i) + " ")

        ### lcap_id
        lcap_count = len(frag2lcapconn[frag_i])
        if lcap_count == 0:
            log_str.append("-1 ")
        else:
            for i in range(lcap_count):
                cap_i = frag2lcapconn[frag_i][i]
                if i < lcap_count - 1:
                    log_str.append(str(cap_i) + ",")
                else:
                    log_str.append(str(cap_i) + " ")

        ### rcap_id
        rcap_count = len(frag2rcapconn[frag_i])
        if rcap_count == 0:
            log_str.append("-1 ")
        else:
            for i in range(rcap_count):
                cap_i = frag2rcapconn[frag_i][i]
                if i < rcap_count - 1:
                    log_str.append(str(cap_i) + ",")
                else:
                    log_str.append(str(cap_i) + " ")

        ### N_atoms
        log_str.append(str(frag.GetNumAtoms()) + " ")
        ### N_bonds
        log_str.append(str(frag.GetNumBonds()) + " ")
        ### Nnonhatoms
        log_str.append(str(frag.GetNumHeavyAtoms()) + " ")
        ### Chg
        log_str.append(str(rdmolops.GetFormalCharge(frag)) + " ")
        ### Nhbd
        log_str.append(str(rdMolDescriptors.CalcNumHBD(frag)) + " ")
        ### Nhba
        log_str.append(str(rdMolDescriptors.CalcNumHBA(frag)) + " ")
        ### Nrotbonds
        log_str.append(str(rdMolDescriptors.CalcNumRotatableBonds(frag)) + " ")
        ### Nrings
        log_str.append(str(rdMolDescriptors.CalcNumRings(frag)) + " ")

        frag_log.log("".join(log_str))

        png_path = dumpdir + "/png/" + "frag_%d.png" % frag_i
        try:
            Chem.SanitizeMol(frag)
            AllChem.Compute2DCoords(frag)
            Draw.MolToFile(frag, png_path, size=(500, 500))
        except:
            #Chem.Kekulize(frag)
            print("Could not save frag %d to disk." % frag_i)

    frag_log.close()

    mol_log = logger(dumpdir + "/mol.dat")
    mol_log.log("### datadump of database %s" % database)
    mol_log.log("### timestamp %s" % time.asctime(time.localtime(time.time())))
    mol_log.log("### written by run_fragresp.py datadump routine.")
    mol_log.log("###")
    mol_log.log("### ----------------- ###")
    mol_log.log("### MOLECULE DATA LOG ###")
    mol_log.log("### ----------------- ###")
    mol_log.log("###")
    mol_log.log(
        "# id name smiles frag_id Natoms Nbonds Nnonhatoms Chg Nhbd Nhba Nrotbonds Nrings"
    )

    for mol_i in range(db.get_mol_count()):
        mol = db.get_mol(mol_i)
        Chem.SanitizeMol(mol)
        name = db.get_name(mol_i)
        decomp = db.get_decompose(mol_i)

        log_str = list()

        log_str.append(str(mol_i) + " ")
        log_str.append(name + " ")
        log_str.append(str(Chem.MolToSmiles(mol, isomericSmiles=True)) + " ")

        frag_count = decomp.get_frag_count()

        if frag_count == 0:
            log_str.append("-1 ")
        else:
            for i in range(frag_count):
                frag_i = mol2frag[mol_i][i]
                if i < frag_count - 1:
                    log_str.append(str(frag_i) + ",")
                else:
                    log_str.append(str(frag_i) + " ")

        log_str.append(str(mol.GetNumAtoms()) + " ")
        log_str.append(str(mol.GetNumBonds()) + " ")
        log_str.append(str(mol.GetNumHeavyAtoms()) + " ")
        log_str.append(str(rdmolops.GetFormalCharge(mol)) + " ")
        log_str.append(str(rdMolDescriptors.CalcNumHBD(mol)) + " ")
        log_str.append(str(rdMolDescriptors.CalcNumHBA(mol)) + " ")
        log_str.append(str(rdMolDescriptors.CalcNumRotatableBonds(mol)) + " ")
        log_str.append(str(rdMolDescriptors.CalcNumRings(mol)) + " ")

        mol_log.log("".join(log_str))

        png_path = dumpdir + "/png/" + "mol_%d.png" % mol_i
        AllChem.Compute2DCoords(mol)
        Chem.Kekulize(mol)
        Draw.MolToFile(mol, png_path, size=(500, 500))

    mol_log.close()

    surr_log = logger(dumpdir + "/surr.dat")
    surr_log.log("### datadump of database %s" % database)
    surr_log.log("### timestamp %s" %
                 time.asctime(time.localtime(time.time())))
    surr_log.log("### written by run_fragresp.py datadump routine.")
    surr_log.log("###")
    surr_log.log("### ----------------- ###")
    surr_log.log("### SURROGATE DATA LOG ###")
    surr_log.log("### ------------------ ###")
    surr_log.log("###")
    surr_log.log(
        "# id name smiles mol_id Natoms Nbonds Nnonhatoms Chg Nhbd Nhba Nrotbonds Nrings"
    )

    for conn_i, conn in enumerate(db.get_conn_list()):

        if conn.get_terminal():
            continue

        name = conn.get_name()

        conn_cap = conn.get_surrogate_cap()
        Chem.SanitizeMol(conn_cap)

        log_str = list()

        log_str.append(str(conn_i) + " ")
        log_str.append(name + " ")
        log_str.append(
            str(Chem.MolToSmiles(conn_cap, isomericSmiles=True)) + " ")

        conn2mol = db.get_conn2mol()[conn_i]
        mol_count = len(conn2mol)

        if mol_count == 0:
            log_str.append("-1 ")
        else:
            for i in range(mol_count):
                mol_i = conn2mol[i]
                if i < mol_count - 1:
                    log_str.append(str(mol_i) + ",")
                else:
                    log_str.append(str(mol_i) + " ")

        log_str.append(str(conn_cap.GetNumAtoms()) + " ")
        log_str.append(str(conn_cap.GetNumBonds()) + " ")
        log_str.append(str(conn_cap.GetNumHeavyAtoms()) + " ")
        log_str.append(str(rdmolops.GetFormalCharge(conn_cap)) + " ")
        log_str.append(str(rdMolDescriptors.CalcNumHBD(conn_cap)) + " ")
        log_str.append(str(rdMolDescriptors.CalcNumHBA(conn_cap)) + " ")
        log_str.append(
            str(rdMolDescriptors.CalcNumRotatableBonds(conn_cap)) + " ")
        log_str.append(str(rdMolDescriptors.CalcNumRings(conn_cap)) + " ")

        surr_log.log("".join(log_str))

        png_path = dumpdir + "/png/" + "surr_%s.png" % (conn_i)
        AllChem.Compute2DCoords(conn_cap)
        Chem.Kekulize(conn_cap)
        Draw.MolToFile(conn_cap, png_path, size=(500, 500))

    surr_log.close()
예제 #9
0
def standard_qlj_typer(mol):
    """
    This function parameterizes the nonbonded terms of a molecule
    in a relatively simple and forcefield independent way. The
    parameters here roughly follow the Smirnoff 1.1.0 Lennard Jones types.

    These values are taken from timemachine/ff/params/smirnoff_1_1_0_cc.py, rounding down
    to two decimal places for sigma and one decimal place for epsilon.

    Note that charges are set to net_formal_charge(mol)/num_atoms.

    Parameters
    ----------
    mol: RDKit.ROMol
        RDKit molecule

    Returns
    -------
    [N,3] array containing (charge, sigma, epsilon)

    """

    standard_qlj = []

    # for charged ligands, we don't want to remove the charge fully as it will
    # introduce large variance in the resulting estimator
    standard_charge = float(rdmolops.GetFormalCharge(mol)) / mol.GetNumAtoms()

    for atom in mol.GetAtoms():
        a_num = atom.GetAtomicNum()
        if a_num == 1:
            assert len(atom.GetNeighbors()) == 1
            neighbor = atom.GetNeighbors()[0]
            b_num = neighbor.GetAtomicNum()
            if b_num == 6:
                val = (standard_charge, 0.25, 0.25)
            elif b_num == 7:
                val = (standard_charge, 0.10, 0.25)
            elif b_num == 8:
                val = (standard_charge, 0.05, 0.02)
            elif b_num == 16:
                val = (standard_charge, 0.10, 0.25)
            else:
                val = (standard_charge, 0.10, 0.25)
        elif a_num == 6:
            val = (standard_charge, 0.34, 0.6)
        elif a_num == 7:
            val = (standard_charge, 0.32, 0.8)
        elif a_num == 8:
            val = (standard_charge, 0.30, 0.9)
        elif a_num == 9:
            val = (standard_charge, 0.3, 0.5)
        elif a_num == 15:
            val = (standard_charge, 0.37, 0.9)
        elif a_num == 16:
            val = (standard_charge, 0.35, 1.0)
        elif a_num == 17:
            val = (standard_charge, 0.35, 1.0)
        elif a_num == 35:
            val = (standard_charge, 0.39, 1.1)
        elif a_num == 53:
            val = (standard_charge, 0.41, 1.2)
        else:
            # print("Unknown a_num", a_num)
            assert 0, "Unknown a_num " + str(a_num)

        # sigmas need to be halved
        standard_qlj.append((val[0], val[1] / 2, val[2]))

    standard_qlj = np.array(standard_qlj)

    return standard_qlj
예제 #10
0
파일: timber.py 프로젝트: callumjd/timber
                os.mkdir(dir_2_name)

                ## Write start ligand file, parameters ##
                os.chdir(dir_1_name)
                writer = SDWriter('for_parm.sdf')
                if ligands_name.count(pair[0]) > 0:
                    writer.write(ligands[ligands_name.index(pair[0])])
                    writer.flush()
                else:
                    print('Error: cannot map ligand %s.\n' % (pair[0]))
                    sys.exit()
                run_antechamber('for_parm.sdf',
                                'UNL',
                                ff,
                                int(
                                    rdmolops.GetFormalCharge(
                                        ligands[ligands_name.index(pair[0])])),
                                clean_sdf=True)

                # setup Molecule_ff
                LIG = Molecule_ff(name='LIG')
                n_atoms = len(ligands[ligands_name.index(pair[0])].GetAtoms())
                for at in ligands[ligands_name.index(pair[0])].GetAtoms():
                    x = ligands[ligands_name.index(
                        pair[0])].GetConformer().GetAtomPosition(at.GetIdx()).x
                    y = ligands[ligands_name.index(
                        pair[0])].GetConformer().GetAtomPosition(at.GetIdx()).y
                    z = ligands[ligands_name.index(
                        pair[0])].GetConformer().GetAtomPosition(at.GetIdx()).z
                    LIG.add_atom(
                        Atom_ff(idx=at.GetIdx(),
                                atomic_num=at.GetAtomicNum(),