Esempio n. 1
0
def process():
    if len(sys.argv) < 4:
	    print('Usage:', sys.argv[0], '[training-set.sdf] [logS-data] [regression coeff. output file]', file=sys.stderr)
        sys.exit(2)

	struct_is = Base.FileIOStream(sys.argv[1], 'r')
	exp_logs_is = Base.FileIOStream(sys.argv[2], 'r')
	coeff_os = Base.FileIOStream(sys.argv[3], 'w')
Esempio n. 2
0
def process():
    if len(sys.argv) < 4:
	    print('Usage:', sys.argv[0], 'training-set.sdf logP-data regression-coeff-file', file=sys.stderr)
        sys.exit(2)

	struct_is = Base.FileIOStream(sys.argv[1], 'r')
	exp_logp_is = Base.FileIOStream(sys.argv[2], 'r')
	coeff_os = Base.FileIOStream(sys.argv[3], 'w')
Esempio n. 3
0
def process():
    if len(sys.argv) < 4:
        print('Usage:',
              sys.argv[0],
              '[input.sdf] [exclude-molecules.sdf] [output.sdf]',
              file=sys.stderr)
        sys.exit(2)

    ifs = Base.FileIOStream(sys.argv[1], 'r')
    xifs = Base.FileIOStream(sys.argv[2], 'r')
    ofs = Base.FileIOStream(sys.argv[3], 'w')

    reader = Chem.SDFMoleculeReader(ifs)
    xreader = Chem.SDFMoleculeReader(xifs)
    writer = Chem.SDFMolecularGraphWriter(ofs)
    mol = Chem.BasicMolecule()

    Chem.setMultiConfImportParameter(reader, False)
    Chem.setMultiConfImportParameter(xreader, False)
    Chem.setMultiConfExportParameter(writer, False)

    stats = Stats()
    stats.read = 0
    stats.dropped = 0

    xhashes = set()

    while xreader.read(mol):
        setupMolecule(mol)

        hashcode = Chem.calcHashCode(mol)
        xhashes.add(hashcode)

    while reader.read(mol):
        #print('Processing Molecule ' + str(stats.read)
        setupMolecule(mol)

        hashcode = Chem.calcHashCode(mol)

        if hashcode in xhashes:
            stats.dropped += 1
            print('Dropped Molecule ' + str(stats.read) + ': ' +
                  Chem.generateSMILES(mol) + ' ' + Chem.getName(mol),
                  file=sys.stderr)
        else:
            writer.write(mol)

        stats.read += 1

        if stats.read % 10000 == 0:
            print('Processed ' + str(stats.read) + ' Molecules...',
                  file=sys.stderr)

    print('', file=sys.stderr)
    print('-- Summary --', file=sys.stderr)
    print('Molecules processed: ' + str(stats.read), file=sys.stderr)
    print('Molecules dropped: ' + str(stats.dropped), file=sys.stderr)
Esempio n. 4
0
        def read_in_ph(ph_path, output_dir_path):

            fr = Pharm.PMLPharmacophoreReader(Base.FileIOStream(ph_path))
            ph = Pharm.BasicPharmacophore()
            fr.read(ph)
            ph.pml_path = ph_path
            ph.dir_path = output_dir_path

            return ph
Esempio n. 5
0
def savePharmacophore(pha, path):
    '''
    Saves a particula pha at the target path.\n
    Input:\n
    pha (CDPL BasicPharmacophore): the pharmacophore to be saved as a pml file \n
    path (String): path where to save the pml file (includes the filename.pml)
    '''

    Pharm.PMLFeatureContainerWriter(Base.FileIOStream(path, 'w')).write(pha)

    return True
Esempio n. 6
0
 def write_ph_for_rpms(self, rpm_maps, output_directory):
     for fv in rpm_maps:
         directory = output_directory + '/' + str(fv)
         if not os.path.exists(directory):
             os.makedirs(directory)
         for ph_key in rpm_maps[fv]:
             ph_to_write = directory + '/ph_' + str(fv) + '_' + str(
                 ph_key) + '.pml'
             print '- Writing pharmacophore: ' + str(ph_to_write)
             Pharm.PMLFeatureContainerWriter(
                 Base.FileIOStream(ph_to_write,
                                   'w')).write(rpm_maps[ph_key])
def cdfMol_pdb(pdb, output, name):
    initial_time = time.time()
    cdf_mol = Chem.BasicMolecule()
    pdb_mol = Chem.BasicMolecule()

    pdb_str = open(pdb, 'r').read().replace('WAT', 'HOH').replace('HIE', 'HIS')
    pdb_reader = Biomol.PDBMoleculeReader(Base.StringIOStream(pdb_str))

    Biomol.setPDBApplyDictAtomBondingToNonStdResiduesParameter(
        pdb_reader, True)
    if not pdb_reader.read(pdb_mol):
        return None

    Chem.calcImplicitHydrogenCounts(pdb_mol, False)
    Chem.perceiveHybridizationStates(pdb_mol, False)
    Chem.setAtomSymbolsFromTypes(pdb_mol, False)
    Chem.perceiveSSSR(pdb_mol, False)
    Chem.setRingFlags(pdb_mol, False)
    Chem.setAromaticityFlags(pdb_mol, False)

    cdf_mol.assign(pdb_mol)
    for atom in cdf_mol.atoms:
        Chem.set3DCoordinatesArray(atom, Math.Vector3DArray())

    i = 0
    while i < cdf_mol.numAtoms:
        Chem.get3DCoordinatesArray(cdf_mol.getAtom(i)).addElement(
            Chem.get3DCoordinates(pdb_mol.getAtom(i)))
        i += 1

    tmp_output = output + name + ".cdf"
    try:
        Chem.FileCDFMolecularGraphWriter(tmp_output).write(cdf_mol)
    except:
        print('> Cdf_mol writing failure.')
        raise

    residues = Biomol.ResidueList(cdf_mol)
    tmp_output = output + name + "_residue_info.txt"
    with open(tmp_output, 'w') as txt_writer:
        txt_writer.write('residue name_resid_chain\n')
        for res in residues:
            res_id = getResidueID(res)
            txt_writer.write('{}: \n'.format(res_id))

    calc_time = time.time() - initial_time
    print('> Cdf and amino acid residue number list files generated in {}s'.
          format(int(calc_time)))
Esempio n. 8
0
def CDPLphaFromPML(pml_path):
    '''
    reads a single CDPL BasicPharmacophore from an pml-file.
    Input: \n
    pml_path (string): path to the pml file \n
    Return: \n
    (CDPL BasicPharmacophore): the corresponding CDPL BasicPharmacophore 
    '''
    pha = Pharm.BasicPharmacophore()
    ifs = Base.FileIOStream(pml_path, 'r')
    pml_reader = Pharm.PMLPharmacophoreReader(ifs)

    if not pml_reader.read(pha):
        log.error("COULD NOT READ PML", pml_path)
        return False
    return pha
def mol_to_smiles(mol,
                  kekulized=False,
                  canonical=True,
                  atom_stereo=True,
                  hydrogen_deplete=True,
                  bond_stereo=False):
    stream = Base.StringIOStream()
    w = Chem.SMILESMolecularGraphWriter(stream)
    Chem.setSMILESWriteKekuleFormParameter(w, kekulized)
    Chem.setSMILESWriteCanonicalFormParameter(w, canonical)
    Chem.setSMILESRecordFormatParameter(w, 'S')
    Chem.setSMILESWriteAtomStereoParameter(w, atom_stereo)
    Chem.setSMILESWriteBondStereoParameter(w, bond_stereo)
    Chem.setSMILESNoOrganicSubsetParameter(w, False)
    Chem.setOrdinaryHydrogenDepleteParameter(w, hydrogen_deplete)
    Chem.calcImplicitHydrogenCounts(mol, True)
    w.write(mol)
    w.close()
    return stream.value
Esempio n. 10
0
def CDPLmolFromSdf(sdf_path, conformation):
    '''
    generates a single CDPL Molecule from an sdf-file. If conformations is true, then
    one random conformation will be generated. \n
    Input: \n
    sdf_path (string): path to the sdf file \n
    conformation (boolean): generates one 3d conformation according to MMFF94 \n
    Return: \n
    (CDPL BasicMolecule): the corresponding CDPL BasicMolecule 
    '''
    mol = Chem.BasicMolecule()
    ifs = Base.FileIOStream(sdf_path, 'r')
    sdf_reader = Chem.SDFMoleculeReader(ifs)

    if not sdf_reader.read(mol):
        log.error("COULD NOT READ SDF", sdf_path)
        return False
    if conformation:
        return _CDPLgenerateConformation(mol)
    return mol
Esempio n. 11
0
def _CDPLreadFromPDBFile(pdb_file):
    '''
    PRIVAT METHOD
    reads a pdb file and is used by the CDPLreadProteinFile method.
    Input: \n
    pdb_file (string): the path to the pdb file  \n
    Return: \n
    (CDPL BasicMolecule): the corresponding pdb molecule
     '''
    ifs = Base.FileIOStream(pdb_file, 'r')
    pdb_reader = Biomol.PDBMoleculeReader(ifs)
    pdb_mol = Chem.BasicMolecule()

    Biomol.setPDBApplyDictAtomBondingToNonStdResiduesParameter(
        pdb_reader, False
    )  #TODO Should this be there for the pdb readin? or also in the config?

    if not pdb_reader.read(pdb_mol):
        log.error("COULD NOT READ PDB", pdb_file)
        return False

    return pdb_mol
Esempio n. 12
0
        def process(sdf_file, psd_file_path):

            ifs = Base.FileIOStream(sdf_file, 'r')

            reader = Chem.SDFMoleculeReader(ifs)

            mol = Chem.BasicMolecule()

            Chem.setMultiConfImportParameter(reader, True)

            psd_creator = Pharm.PSDScreeningDBCreator(
                psd_file_path, Pharm.PSDScreeningDBCreator.CREATE, True)
            i = 0
            t0 = time.clock()

            while reader.read(mol):
                setupMolecule(mol)

                psd_creator.process(mol)
                i += 1

                if i % 100 == 0:
                    print 'Processed ' + str(i) + ' molecules (' + str(
                        time.clock() - t0), 's elapsed)...'
                    t0 = time.clock()

                mol.clear()

            print ''
            print '-- Summary --'
            print 'Molecules processed: ' + str(psd_creator.numProcessed)
            print 'Molecules rejected: ' + str(psd_creator.numRejected)
            print 'Molecules deleted: ' + str(psd_creator.numDeleted)
            print 'Molecules inserted: ' + str(psd_creator.numInserted)

            psd_creator.close()
Esempio n. 13
0
def readPDBFromFile(path: str) -> Chem.BasicMolecule:
    s = Base.FileIOStream(path)
    protein = readPDBFromStream(s)
    return protein
Esempio n. 14
0
        def generate_ph(pdb, key):

            ifs = Base.FileIOStream(pdb, 'r')
            tlc = self.ligand_3_letter_code
            pdb_reader = Biomol.PDBMoleculeReader(ifs)
            pdb_mol = Chem.BasicMolecule()

            print '- Reading input: ', pdb, ' ...'

            if not pdb_reader.read(pdb_mol):
                print '!! Could not read input molecule'
                return

            print '- Processing macromolecule', pdb, ' ...'

            i = 0

            while i < pdb_mol.getNumBonds():
                bond = pdb_mol.getBond(i)

                if Chem.isMetal(bond.atoms[0]) or Chem.isMetal(bond.atoms[1]):
                    pdb_mol.removeBond(i)
                else:
                    i += 1

            Chem.calcImplicitHydrogenCounts(pdb_mol, True)
            Chem.perceiveHybridizationStates(pdb_mol, True)
            Chem.makeHydrogenComplete(pdb_mol)
            Chem.setAtomSymbolsFromTypes(pdb_mol, False)
            Chem.calcImplicitHydrogenCounts(pdb_mol, True)
            Biomol.setHydrogenResidueSequenceInfo(pdb_mol, False)
            Chem.setRingFlags(pdb_mol, True)
            Chem.setAromaticityFlags(pdb_mol, True)
            Chem.generateHydrogen3DCoordinates(pdb_mol, True)
            ligand = Chem.Fragment()

            print '- Extracting ligand ', tlc, ' ...'

            for atom in pdb_mol.atoms:
                if Biomol.getResidueCode(atom) == tlc:
                    Biomol.extractResidueSubstructure(atom, pdb_mol, ligand,
                                                      False)
                    break

            if ligand.numAtoms == 0:
                print '!! Could not find ligand', tlc, 'in input file'
                return

            Chem.perceiveSSSR(ligand, True)

            lig_env = Chem.Fragment()

            Biomol.extractEnvironmentResidues(ligand, pdb_mol, lig_env, 7.0)
            Chem.perceiveSSSR(lig_env, True)
            print '- Constructing pharmacophore ...'
            lig_pharm = Pharm.BasicPharmacophore()
            env_pharm = Pharm.BasicPharmacophore()
            pharm_gen = Pharm.DefaultPharmacophoreGenerator(False)
            pharm_gen.generate(ligand, lig_pharm)
            pharm_gen.generate(lig_env, env_pharm)
            analyzer = Pharm.DefaultInteractionAnalyzer()
            interactions = Pharm.FeatureMapping()
            analyzer.analyze(lig_pharm, env_pharm, interactions)

            #------------------------- XVOLS

            int_env_ftrs = Pharm.FeatureSet()
            Pharm.getFeatures(int_env_ftrs, interactions, False)
            int_core_ftrs = Pharm.FeatureSet()
            Pharm.getFeatures(int_core_ftrs, interactions, True)
            int_pharm = Pharm.BasicPharmacophore(int_core_ftrs)

            for ftr in int_env_ftrs:
                if Pharm.getType(
                        ftr
                ) == Pharm.FeatureType.H_BOND_DONOR or Pharm.getType(
                        ftr) == Pharm.FeatureType.H_BOND_ACCEPTOR:
                    Pharm.setTolerance(ftr, 1.0)
                else:
                    Pharm.setTolerance(ftr, 1.5)

            Pharm.createExclusionVolumes(int_pharm, int_env_ftrs, 0.0, 0.1,
                                         False)
            int_env_ftr_atoms = Chem.Fragment()
            Pharm.getFeatureAtoms(int_env_ftrs, int_env_ftr_atoms)
            int_residue_atoms = Chem.Fragment()
            Biomol.extractResidueSubstructures(int_env_ftr_atoms, lig_env,
                                               int_residue_atoms, True)
            Chem.makeHydrogenDeplete(int_residue_atoms)

            def isAlphaAtom(atom):
                return Biomol.getResidueAtomName(atom) == 'CA'

            Chem.removeAtomsIfNot(int_residue_atoms, isAlphaAtom)
            Pharm.createExclusionVolumes(int_pharm, int_residue_atoms,
                                         Chem.Atom3DCoordinatesFunctor(), 1.0,
                                         2.0, False)

            features_in_ph = []
            for int_ftr in int_pharm:
                if Pharm.hasSubstructure(int_ftr) == False:
                    continue
                elif ftype_names[Pharm.getType(int_ftr)] == 'XV':
                    continue
                feature_id = generate_key(int_ftr)
                features_in_ph.append(str(feature_id))
                self.unique_feature_vector.add(str(feature_id))

            int_pharm.fv = features_in_ph
            int_pharm.path_to_pdb = pdb

            return int_pharm
def mol_from_smiles(smiles: str) -> Chem.BasicMolecule:
    ifs = Base.StringIOStream(smiles)
    mol = Chem.BasicMolecule()
    r = Chem.SMILESMoleculeReader(ifs)
    r.read(mol)
    return sanitize_mol(mol)
Esempio n. 16
0
def cleanStructures():
    if len(sys.argv) < 5:
        print('Usage:',
              sys.argv[0],
              '[input.sdf] [output.sdf] [dropped.sdf] [start_index] [[count]]',
              file=sys.stderr)
        sys.exit(2)

    ifs = Base.FileIOStream(sys.argv[1], 'r')
    ofs = Base.FileIOStream(sys.argv[2], 'w')
    dofs = Base.FileIOStream(sys.argv[3], 'w')
    offset = int(sys.argv[4])
    count = 0

    if len(sys.argv) > 5:
        count = int(sys.argv[5])

    reader = Chem.SDFMoleculeReader(ifs)
    writer = Chem.SDFMolecularGraphWriter(ofs)
    dwriter = Chem.SDFMolecularGraphWriter(dofs)
    mol = Chem.BasicMolecule()

    #Chem.setSMILESRecordFormatParameter(reader, 'SN')

    stats = Stats()
    stats.read = 0
    stats.dropped = 0
    stats.modified = 0

    Chem.setMultiConfImportParameter(reader, False)
    Chem.setMultiConfExportParameter(writer, False)
    Chem.setMultiConfExportParameter(dwriter, False)

    if offset > 0:
        print('Skipping Molecules to Start Index ' + str(offset),
              file=sys.stderr)
        reader.setRecordIndex(offset)
        #print('Finished Setting Record Index', file=sys.stderr)

    stats.read = offset

    while reader.read(mol):
        #print('Processing Molecule ' + str(stats.read)
        proc_mol = processMolecule(mol, stats)

        if proc_mol:
            writer.write(proc_mol)
        else:
            stats.dropped += 1
            dwriter.write(mol)
            print('Dropped Molecule ' + str(stats.read) + ': ' +
                  generateSMILES(mol) + ' ' + Chem.getName(mol),
                  file=sys.stderr)

        stats.read += 1

        if stats.read % 10000 == 0:
            print('Processed ' + str(stats.read - offset) + ' Molecules...',
                  file=sys.stderr)

        if count > 0 and (stats.read - offset) >= count:
            break

    print('', file=sys.stderr)
    print('-- Summary --', file=sys.stderr)
    print('Molecules processed: ' + str(stats.read - offset), file=sys.stderr)
    print('Molecules dropped: ' + str(stats.dropped), file=sys.stderr)
    print('Molecules modified: ' + str(stats.modified), file=sys.stderr)
def readPDBFromString(string: str) -> Chem.BasicMolecule:
    s = Base.StringIOStream(string)
    protein = readPDBFromStream(s)
    return protein
def writePDB(path: str, protein: Chem.BasicMolecule) -> None:
    s = Base.FileIOStream(path)
    w = Biomol.PDBMolecularGraphWriter(s)
    w.write(protein)
    w.close()
def process():
    if len(sys.argv) < 4:
        print(
            'Usage:',
            sys.argv[0],
            '[input torsion rules.xml] [structures.sdf] [output torsion histogram library.sdf]',
            file=sys.stderr)
        sys.exit(2)

    tor_lib = ConfGen.TorsionLibrary()

    try:
        tor_lib.load(Base.FileIOStream(sys.argv[1], 'r'))
    except:
        print('Error while loading input torsion rules:',
              sys.exc_info()[0],
              file=sys.stderr)
        sys.exit(2)

    tor_matcher = ConfGen.TorsionRuleMatcher(tor_lib)

    tor_matcher.findAllRuleMappings(True)
    tor_matcher.findUniqueMappingsOnly(True)
    tor_matcher.stopAtFirstMatchingRule(True)

    mol = Chem.BasicMolecule()
    mol_reader = Chem.FileSDFMoleculeReader(sys.argv[2])

    Chem.setMultiConfImportParameter(mol_reader, False)

    print('- Analyzing input structures...', file=sys.stderr)

    i = 1
    rule_to_angle_hists = {}
    coords = Math.Vector3DArray()

    while True:
        try:
            if not mol_reader.read(mol):
                break

        except IOError as e:
            print('Error while reading input molecule',
                  i,
                  ':',
                  e,
                  file=sys.stderr)
            i += 1
            continue

        if i % 500 == 0:
            print('   ... At input molecule', i, '...', file=sys.stderr)

        Chem.initSubstructureSearchTarget(mol, False)

        try:
            Chem.get3DCoordinates(mol, coords)

        except Base.ItemNotFound:
            print('Could not get 3D-coordinates for molecule',
                  i,
                  file=sys.stderr)
            i += 1
            continue

        for bond in mol.bonds:
            if Chem.getRingFlag(bond):
                continue

            if Chem.isHydrogenBond(bond):
                continue

            if Chem.getExplicitBondCount(
                    bond.getBegin()) <= 1 or Chem.getExplicitBondCount(
                        bond.getEnd()) <= 1:
                continue

            tor_matcher.findMatches(bond, mol, False)

            for match in tor_matcher:
                processMatch(i, match, mol, coords, rule_to_angle_hists)

        i += 1

    print('- Processing torsion angle histograms...', file=sys.stderr)

    processHistograms(tor_lib, rule_to_angle_hists)

    print('- Writing output torsion library...', file=sys.stderr)

    try:
        tor_lib.save(Base.FileIOStream(sys.argv[3], 'w+'))

    except:
        print('Error while writing torsion library:',
              sys.exc_info()[0],
              file=sys.stderr)
        sys.exit(2)

    print('DONE!', file=sys.stderr)
Esempio n. 20
0
def generate_ph(pdb, args, df_constructor, ts):

    ifs = Base.FileIOStream(pdb, 'r')
    tlc = args.ligand_three_letter_code
    pdb_reader = Biomol.PDBMoleculeReader(ifs)
    pdb_mol = Chem.BasicMolecule()

    print '- Reading input: ', pdb, ' ...'

    if not pdb_reader.read(pdb_mol):
        print '!! Could not read input molecule'
        return

    print '- Processing macromolecule', pdb, ' ...'

    i = 0

    while i < pdb_mol.getNumBonds():
        bond = pdb_mol.getBond(i)

        if Chem.isMetal(bond.atoms[0]) or Chem.isMetal(bond.atoms[1]):
            pdb_mol.removeBond(i)
        else:
            i += 1

    for a in pdb_mol.atoms:
        Chem.setImplicitHydrogenCount(a, 0)

    Chem.calcImplicitHydrogenCounts(pdb_mol, True)
    Chem.perceiveHybridizationStates(pdb_mol, True)
    Chem.makeHydrogenComplete(pdb_mol)
    Chem.setAtomSymbolsFromTypes(pdb_mol, False)
    Chem.calcImplicitHydrogenCounts(pdb_mol, True)
    Biomol.setHydrogenResidueSequenceInfo(pdb_mol, False)
    Chem.setRingFlags(pdb_mol, True)
    Chem.setAromaticityFlags(pdb_mol, True)
    Chem.generateHydrogen3DCoordinates(pdb_mol, True)
    Chem.calcFormalCharges(pdb_mol, True)
    ligand = Chem.Fragment()

    print '- Extracting ligand ', tlc, ' ...'

    for atom in pdb_mol.atoms:
        if Biomol.getResidueCode(atom) == tlc:
            Biomol.extractResidueSubstructure(atom, pdb_mol, ligand, False)
            break

    if ligand.numAtoms == 0:
        print '!! Could not find ligand', tlc, 'in input file'
        return

    Chem.perceiveSSSR(ligand, True)

    lig_env = Chem.Fragment()

    Biomol.extractEnvironmentResidues(ligand, pdb_mol, lig_env, 7.0)
    Chem.perceiveSSSR(lig_env, True)
    print '- Constructing pharmacophore ...'
    lig_pharm = Pharm.BasicPharmacophore()
    env_pharm = Pharm.BasicPharmacophore()
    pharm_gen = Pharm.DefaultPharmacophoreGenerator(True)
    pharm_gen.generate(ligand, lig_pharm)
    pharm_gen.generate(lig_env, env_pharm)
    #Pharm.FilePMLFeatureContainerWriter('./test/lig_ph_' + str(ts) + '.pml').write(lig_pharm)

    analyzer = Pharm.DefaultInteractionAnalyzer()
    interactions = Pharm.FeatureMapping()
    analyzer.analyze(lig_pharm, env_pharm, interactions)
    df_constructor, interaction_at_ts = outputInteractions(
        lig_pharm, env_pharm, interactions, df_constructor)
    #Chem.FileSDFMolecularGraphWriter('./test/ligand_' + str(ts) + '.sdf').write(ligand)

    return df_constructor, interaction_at_ts