Exemple #1
0
def mp_helper(file_one, file_two, iteration_depth = 1):
    """
        Helper function for the multiprocessing library.
        Two fragment files gets passed and we merge all against all molecules in that two files.
    """
    results = list()
    fragments = list()
    for mol_one in pybel.readfile( 'smi', file_one ):
        for i,mol_two in enumerate(pybel.readfile( 'smi', file_two )):
            #print 'merge:', str(mol_one).strip(), str(mol_two).strip()
            result, fragment = merge(mol_two,mol_one, options, iteration_depth)
            if result:
                #print '\tr', result
                results.extend( result )
            if fragment:
                #print '\tf', fragment
                fragments.extend( fragment )

    fragment_return, molecule_return = None, None
    if fragments:
        fragment_file = tempfile.NamedTemporaryFile(dir=temp_dir, delete=False)
        fragment_file.writelines( fragments )
        fragment_file.close()
        fragment_return = fragment_file.name
    if results:
        result_file = tempfile.NamedTemporaryFile(dir=temp_dir, delete=False)
        result_file.writelines( results )
        result_file.close()
        molecule_return = result_file.name
    return molecule_return, fragment_return
def do_for_each(name, num_threads):
    _format = name[name.rfind(".")+1:]
    basename = name[:name.rfind(".")]
    out_name = basename + ".num"
    pdb_id = basename.split("_")[-1]
    pdb_name = "/home/xmluo/jlpeng/cMet/pdb_protein/%s_protein.pdb"%pdb_id
    global ligand,protein,outf,ap_iter
    ligand = pybel.readfile(_format, name).next()
    protein = pybel.readfile("pdb", pdb_name).next()
    outf = open(out_name, "w")
    #make_pairs()
    #print "after creating pair_queue"
    ap_iter = AtomPairs()
    threads = []
    for i in xrange(num_threads):
        threads.append(AtomPairWalker())
    print "len(threads)=%d"%len(threads)
    for t in threads:
        t.start()
    for t in threads:
        t.join()
        if t.isAlive():
            print "thread %s is alive"%t.getName()
        else:
            print "thread %s is dead"%t.getName()
    outf.close()
Exemple #3
0
def dock_ligands_to_receptors(docking_dir, worker_pool=False, exhaustiveness=None, chosen_receptor=None, restrict_box=True):
  subdirs = glob.glob(os.path.join(docking_dir, '*/'))
  for subdir in subdirs:
    subdir = subdir.rstrip('/')
    receptor_name = os.path.basename(subdir)
    if chosen_receptor is not None and chosen_receptor != receptor_name:
        continue
    print("receptor name = %s" % receptor_name)
    receptor_filename = os.path.join(subdir, "%s.pdbqt" % receptor_name)
    if not os.path.exists(receptor_filename):
      continue

    print("Examining %s" % receptor_filename)

    receptor_pybel = next(pybel.readfile("pdb", 
        os.path.join(subdir, "%s.pdb" % receptor_name)))
    protein_centroid, protein_range = get_molecule_data(receptor_pybel)

    box_dims = protein_range + 5.0

    ligands = sorted(glob.glob(os.path.join(subdir, '*_prepared.pdbqt')))
    print("Num ligands = %d" % len(ligands))


    dock_ligand_to_receptor_partial = partial(dock_ligand_to_receptor, receptor_filename=receptor_filename,
                                              protein_centroid=protein_centroid, box_dims=box_dims,
                                              subdir=subdir, exhaustiveness=exhaustiveness)

    if restrict_box:
      active_ligand = ""
      for ligand in ligands:
        if "CHEM" in ligand:
          active_ligand = ligand
          break

      print("Docking to %s first to ascertain centroid and box dimensions" % active_ligand)

      out_pdb_qt = dock_ligand_to_receptor_partial(active_ligand)
      ligand_pybel = next(pybel.readfile("pdbqt", 
                                    out_pdb_qt))
      ligand_centroid, _ = get_molecule_data(ligand_pybel)
      print("Protein centroid = %s" %(str(protein_centroid)))
      print("Ligand centroid = %s" %(str(ligand_centroid)))
      box_dims = np.array([20., 20., 20.])
      dock_ligand_to_receptor_partial = partial(dock_ligand_to_receptor, receptor_filename=receptor_filename,
                                          protein_centroid=ligand_centroid, box_dims=box_dims,
                                          subdir=subdir, exhaustiveness=exhaustiveness)

      print("Finished docking to %s, docking to remainder of ligands now." % active_ligand)

    if worker_pool is False:
      for i, ligand_file in enumerate(ligands):
        a = time.time()
        dock_ligand_to_receptor_partial(ligand)
        print("took %f seconds to dock single ligand." %(time.time() - a))
    else:
      print("parallelizing docking over worker pool")

      worker_pool.map_sync(dock_ligand_to_receptor_partial, ligands)
Exemple #4
0
    def helper_run(self, result):
        result_pdbqt = result.output().path
        result_lig = pybel.readfile("pdbqt", result_pdbqt).next()
        native_lig = pybel.readfile("pdbqt",
                                    LigPdbqt(self.tname).output().path).next()
        result_rmsd = rmsd_between(native_lig, result_lig)

        return result_rmsd
Exemple #5
0
def create_inchi(job, chemid, xyz_file=''):
    if xyz_file == '':
        xyz_file = os.path.expanduser(job) + 'xyz/' + chemid + '.xyz'
    obmol = list(pybel.readfile('xyz', xyz_file))[0]
    try:
        obmol = list(pybel.readfile('xyz', xyz_file))[0]
    except NameError:
        logging.error('Pybel is not installed or loaded correctly.')
        sys.exit()

    return obmol.write("inchi", opt={'T': 'nostereo'}).split()[0]
Exemple #6
0
def prepare_dsstox_dataset(root, name, dest=None, overwrite=False):
    """ This method bootstraps the analysis of DSSTox data.
       - Rename the compounds
       - Merge train/test
       - Generate 3D conformations
       - Save "master" and "saliviewer" tables
       - Redirects stdout/stderr to a "prepare.log" file
    """
    if not dest: dest = root

    dataset_root = op.join(dest, name)
    dest_sdf = op.join(dataset_root, name + '.sdf')
    if op.exists(dest_sdf) and not overwrite:
        print '%s is already there and not overwriting requested' % dest_sdf
        return

    print 'Reading %s' % name
    train_mols = list(pybel.readfile('sdf', op.join(root, name + '_training.sdf')))
    test_mols = list(pybel.readfile('sdf', op.join(root, name + '_prediction.sdf')))

    print '\tCreating dataset root: %s' % dataset_root
    if not op.exists(dataset_root):
        os.makedirs(dataset_root)

    print '\tRenaming the compounds to keep track of the provenance'
    rename_mols_by_index(train_mols, name + '-train-')
    rename_mols_by_index(test_mols, name + '-test-')

    print '\tGenerating conformations'
    for mol in train_mols + test_mols:
        #Some molecules from mutagenicity produce segfault on make3D
        #See bug report at https://sourceforge.net/tracker/?func=detail&aid=3374324&group_id=40728&atid=428740
        #Train 3988: OC(=O)[C@]1(C)CCC[C@]2(C1CC[C@]13C2CC[C@](C3)([C@]2(C1)OC2)O)C
        #Train 4205: CC(CCC[C@H]([C@H]1CC[C@@H]2[C@]1(C)CC[C@H]1[C@H]2CC2([C@@H]3[C@]1(C)CC[C@@H](C3)Br)S(=O)(=O)CCS2(=O)=O)C)C
        #This kind of fatal errors are worrying, is there any robust way of controlling them in python/java? Will need to create one
        if not any(name in mol.title for name in ('train-3988', 'train-4205')):
            try:
                print 'Conformation for %s' % mol.title
                mol.make3D()
            except Exception:
                print 'Error computing a 3D conformation for %s' % mol.title

    print '\tSaving compounds'
    save_mols(train_mols + test_mols, dest_sdf)

    master_table = op.join(dataset_root, name + '-master.csv')
    print '\tCreating \"master\" table: %s' % master_table
    create_master_table(dest_sdf, master_table)

    sali_table = op.join(dataset_root, name + '-saliviewer.csv')
    print '\tCreating \"saliviewer\" table: %s' % sali_table
    create_saliviewer_input(master_table, sali_table)
    def pred_foo(tname):
        try:
            geaux_sdf = os.path.join(GEAUX_OUTPUT, tname,
                                     tname + '_pred_0.sdf')
            native_sdf = VinaPath(tname).lig_sdf

            geaux_lig = pybel.readfile("sdf", geaux_sdf).next()
            native_lig = pybel.readfile("sdf", native_sdf).next()
            result_rmsd = rmsd_between(native_lig, geaux_lig)
            return result_rmsd
        except Exception as e:
            print(e)
            return None
Exemple #8
0
 def __init__(self, filename, ext):
     self.data = {'name':'','index':'', 'metal':'', 'topology':'', 'parent':'',
             'atomic_info':'', 'bond_table':'', 'connectivity':'',
             'connect_flag':'', 'connect_sym':''}
     name = os.path.split(filename)[-1]
     self.name = clean(name, ext) 
     self.update(name=self.name)
     # may be a source of error.. untested
     if version_info.major >= 3:
         self.mol = next(pybel.readfile(ext, filename))
     else:
         self.mol = pybel.readfile(ext, filename).next()
     self._reset_formal_charges()
    def caculateRMSD(self):
        vina_task = self.requires()
        predicted_pdbqt = vina_task.output().path
        predicted_mol = pybel.readfile('pdbqt', predicted_pdbqt).next()
        crystal_pdbqt = vina_task.lig_pdbqt
        crystal_mol = pybel.readfile('pdbqt', crystal_pdbqt).next()

        def rmsd(m1, m2):
            c1 = [a.coords for a in m1 if not a.OBAtom.IsHydrogen()]
            c2 = [a.coords for a in m2 if not a.OBAtom.IsHydrogen()]
            return dockedpose.rmsd(c1, c2)

        return rmsd(predicted_mol, crystal_mol)
 def run(self):
     data = {}
     for sdf_id in self.getSdfs():
         path = Path(sdf_id)
         lig = pybel.readfile('sdf', path.astex_sdf()).next()
         lig.removeh()
         prt = pybel.readfile('pdb', path.astex_pdb()).next()
         prt.removeh()
         lig_sz = len(lig.atoms)
         prt_sz = len(prt.atoms)
         data[sdf_id] = {"lig_sz": lig_sz,
                         "prt_sz": prt_sz}
     dset = pd.DataFrame(data)
     dset.to_csv(self.output().path)
def main():
    file_list=GetFileList()
    dc_fn='dist_splited_train_0525.dat'
    dc=dist.UnformatedInput(dc_fn)
    pts=dist.PTS(dc)
    for i in range(len(file_list)):
        lig_fn=file_list[i][0]
        pro_fn=file_list[i][1]
        pdbid=file_list[i][2]
        if os.path.isfile(lig_fn):
            lig=pybel.readfile('sdf',lig_fn).next()
            pro=pybel.readfile('pdb',pro_fn).next()
            score,scorev=GetFinger(lig,pro,pts,pdbid)
            print score,scorev
 def helper_geauxdock(tname, version="0.7"):
     try:
         native_sdf = VinaPath(tname).lig_sdf
         geaux_sdf = os.path.join(
             ModelPath(tname,
                       version=version).work_dir,
             "{}_{}_0.sdf".format(tname, version))
         geaux_lig = pybel.readfile("sdf", geaux_sdf).next()
         native_lig = pybel.readfile("sdf", native_sdf).next()
         result_rmsd = rmsd_between(native_lig, geaux_lig)
         return result_rmsd
     except Exception as e:
         print(e)
         return None
Exemple #13
0
    def run(self):
        mypath = self.getPath()
        lig_ifn = mypath.sdf
        prt_ifn = mypath.pdb

        lig_ext = os.path.basename(lig_ifn).split('.')[-1]
        lig = pybel.readfile(lig_ext, lig_ifn).next()
        lig.removeh()
        parser = PDBParser(QUIET=True)
        structure = parser.get_structure('prt', prt_ifn)

        typetable = OBTypeTable()
        typetable.SetFromType('INT')
        typetable.SetToType('SYB')

        dat = []
        atom_types = [typetable.Translate(atom.type) for atom in lig.atoms]
        atom_types = shuffle(atom_types)
        for residue in structure.get_residues():
            dists = residueDistances2LigandAtoms(residue, lig)
            dat.append({"dists": dists,
                        "atom_types": atom_types,
                        "residue": residue.get_resname()})

        to_write = json.dumps(dat, indent=4, separators=(',', ':'))
        with self.output().open('w') as ofs:
            ofs.write(to_write)
Exemple #14
0
def parse_molfile(file_name):
    mol=pybel.readfile('mol',file_name).next()
    mol.addh()
    
    temp_array=[]
    for atom in mol.atoms:
        #if atom.isotope==0:            
        #    symbel=el.get_isotope_info(atom.atomicnum,atom.isotope,'isotope_symbol')
        #else:
        #    symbel=el.get_isotope_info(atom.atomicnum,atom.isotope,'isotope_symbol')
            
        symbel=el.get_isotope_info(atom.atomicnum,atom.isotope,'isotope_symbol')
        temp_array.append([symbel,1,atom.atomicnum,atom.isotope])

    #消除重复项
    return_array=[]
    stack=[]
    for item in temp_array:
        if item not in stack:
            stack.append(item)
            return_array.append([item[0],temp_array.count(item),item[2],item[3]])
            
    '''
    ['12C', 9, 6, 0L]
    ['15N', 1, 7, 15L]
    ['16O', 2, 8, 0L]
    ['35Cl', 1, 17, 0L]
    ['1H', 10, 1, 0L]
    [0]:同位素元素符号
    [1]:该元素的个数
    [2]:原子序数
    [3]:同位素序数,0表示是天然丰度的
    '''
    return return_array
Exemple #15
0
def main(input_ext, inputfile, output_ext, outputfilename,
         nconfs, rmsd_cutoff, energy_cutoff):
    
    ff = pybel._forcefields['mmff94']
    outputfile = pybel.Outputfile(output_ext, outputfilename, overwrite=True)
    for i, mol in enumerate(pybel.readfile(input_ext, inputfile)):
        t = time.time()
        
        print "**Molecule %d\n..title = %s" % (i, mol.title)
        print "..number of rotatable bonds = %d" % mol.OBMol.NumRotors()
        mol.addh()
        ff.Setup(mol.OBMol)
        ff.DiverseConfGen(rmsd_cutoff, nconfs, energy_cutoff)

        ff.GetConformers(mol.OBMol)
        confdata = pybel.ob.toConformerData(mol.OBMol.GetData(pybel.ob.ConformerData))
        energies = confdata.GetEnergies()

        N = mol.OBMol.NumConformers()
        assert N == len(energies)
        print "..generated %d conformers"
        
        u = time.time()
        data = []
        for i in range(N):
            mol.OBMol.SetConformer(i)
            outputfile.write(mol)

        print "..(overall time = %.1fs  writing results = %.1fs)" % (time.time() - t,
                                                                  time.time() -u)
        print "\n"
        
    outputfile.close()
def annotate_sdf_volume(insdf, outsdf):

    output = pybel.Outputfile("sdf", outsdf, overwrite=True);
    for mol in pybel.readfile("sdf", insdf):
        mol.data["estimated_volume"] = estimate_volume(mol);
        output.write(mol);
    output.close()
def recompta(program):
    #dictionaries for the number of conformers and the files
    num_conf = {x:0 for x in xrange(8)} 
    total_files = {x:0 for x in xrange(8)}
    for hetid in hetids:
        #set of conformers for a specific ligand generated by a specific tool
        files_conformations = glob(path_to_conformers  +  program + '/' + hetid +'_*.sdf')
        for file in files_conformations:
            #list of conformers for a specific ligand
            mollist = list( pybel.readfile('sdf', file))
            nmols = len(mollist)
            mol = mollist[0]
            enrotlist = [mol.OBMol.NumRotors() for mol in mollist]
            enrot = max(enrotlist)
            minenrot = min(enrotlist)
            #Error in case of there are conformers in the same file with different number of rotatable bonds
            if enrot != minenrot:
                print "!!!%s!!!" % file
                print "%s vs %s" %(enrot, minenrot)
                shutil.copy(file, "enrot_changing")
            for key in keys:
                if  key == enrot:
                    #addition of conformers for a specific number of rotatable bond. 
                    num_conf[key] += nmols
                    total_files[key] += 1
                    print program,  key, ':', num_conf[key]
                    print program,  key, ':', total_files[key]
    return num_conf, total_files, program
Exemple #18
0
def fp_mds(fptype):
    fpss = []
    for mol in pybel.readfile('sdf', 'solubility.test.sdf'):
        fps = mol.calcfp(fptype=fptype).bits
        if len(fps) > 0: 
            fpss.append(fps)

    cols = max(max(fps) for fps in fpss)
    mat = np.zeros((len(fpss), cols+1), dtype=np.float32)

    for i,fps in enumerate(fpss):
        mat[i,fps] = 1.0

    mat = scale(mat)
    pcs = np.real(pca(mat,npc=2))

    activities = np.random.randn(mat.shape[0]) # dummy activity

    # Set up a regular grid of interpolation points
    xi, yi = np.linspace(pcs[:,0].min(), pcs[:,0].max()), np.linspace(pcs[:,1].min(), pcs[:,1].max())
    xi, yi = np.meshgrid(xi, yi)

    # Interpolate
    rbf = scipy.interpolate.Rbf(pcs[:,0], pcs[:,1], activities, function='linear', smooth=0.1)
    zi = rbf(xi, yi)

    plt.subplot(2,2,pybel.fps[-4:].index(fptype)+1)
    plt.title('%s' % fptype)
    plt.imshow(zi, vmin=zi.min(), vmax=zi.max(), origin='lower', cmap='RdYlGn_r', aspect='auto',
            extent=[pcs[:,0].min(), pcs[:,0].max(), pcs[:,1].min(), pcs[:,1].max()])
    plt.scatter(pcs[:,0], pcs[:,1], c=activities, cmap='RdYlGn_r')
Exemple #19
0
def main():
    fa = sys.stdin.read()
    fa = fa.rstrip()
    of = open(args['outfile'], 'w')
    of.write('cid\t')
    labels = 'InChI\tInChIkey'
    of.write(labels + '\n')
    inputTemp = tempfile.NamedTemporaryFile(suffix='.sdf', delete=False)
    inputTemp.write(fa)
    inputTempName = inputTemp.name
    inputTemp.close()
    for mol in pybel.readfile('sdf', inputTempName):
        info = ''
        myid = mol.title
        mol.addh()
        mol.make3D()
        conv = ob.OBConversion()
        conv.SetInAndOutFormats("sdf", "inchi")
        inchi = conv.WriteString(mol.OBMol)
        conv.SetInAndOutFormats("sdf", "inchikey")
        inchikey = conv.WriteString(mol.OBMol)
        info = re.match(r"^InChI=(.*)\n", inchi).group(1)
        of.write(myid.strip() + '\t' + info + '\t' + inchikey + '\n')
    of.close()
    os.unlink(inputTempName)
Exemple #20
0
Fichier : ob.py Projet : DrewG/oddt
def readfile(format, filename, opt=None, lazy=False):
    if lazy and format == 'mol2':
        return _filereader_mol2(filename, opt=opt)
    elif lazy and format == 'sdf':
        return _filereader_sdf(filename, opt=opt)
    else:
        return pybel.readfile(format, filename, opt=opt)
def step(context):
    # pull the contents of our SD test file
    fn = os.path.join(os.path.dirname(__file__), 'files/behave_sdf.sdf')
    mols = [mol.write("smi").split("\t")[0] for mol in readfile('sdf', fn)]
    print(len(mols))
    context.post_data["type"] = "Smiles"
    context.post_data["objects"] = mols
def step(context, action=None, projkey=None):
    # something here
    from cbh_core_model.models import Project, CBHCompoundBatch
    from rdkit import Chem
    from rdkit.Chem import AllChem, inchi

    path = "/dev/cbh_compound_batches/"
    resp = context.api_client.get(
        path,
        format='json',
        data=context.post_data,
    )

    reg_cmpds = context.ser.deserialize(resp.content)["objects"]
    reg_inchis = []
    # get a list of inchis from the reponse
    for cmpd in reg_cmpds:
        reg_inchis.append(cmpd['standardInchi'].strip())

    fn = os.path.join(os.path.dirname(__file__), 'files/inchi-list.txt')
    inchis = [mol.write("inchi").split("\t")[0].strip()
              for mol in readfile('inchi', fn)]

    # do an array subtraction of the hardcoded inchis from the registered inchis
    # print(set(inchis))
    print(len(inchis))
    # print(set(reg_inchis))
    print(len(reg_inchis))
    diff = list(set(inchis) - set(reg_inchis))
    print(len(diff))
    # print(diff)
    assert len(diff) == 0
    def pocketSection(self):
        cleaned = self.__cleanedPdb()
        prt = pybel.readstring("pdb", cleaned)
        if type(self.lig_path) is str and os.path.exists(self.lig_path):
            suffix = self.lig_path.split('.')[-1]
            lig = pybel.readfile(suffix, self.lig_path).next()
        elif type(self.lig_path) is pybel.Molecule:
            lig = self.lig_path
        else:
            raise Exception("Wrong input for ligand")

        pkt_lines = []
        residues = set()
        for line, atom in zip(cleaned.split("\n")[:-1], prt.atoms):
            coords = atom.coords
            dists = [euclidean(coords, a.coords) for a in lig.atoms]
            if any([d < self.threshold for d in dists]):
                pkt_lines.append(line)
                res_num = int(line[22:26])
                residues.add(res_num)

        if self.title == "":
            start_pkt_line = "\nPKT %d 1000 %s\n" % (len(residues),
                                                    lig.title.split('/')[-1])
        else:
            start_pkt_line = "\nPKT %d 1000 %s\n" % (len(residues),
                                                     self.title)

        return start_pkt_line + "\n".join(pkt_lines) + "\nTER\n"
Exemple #24
0
 def aggregate(self):
     """remove hydrogen and add <MOLID>
     """
     untarred_dir = self.requires().output().path
     zincs = glob(os.path.join(untarred_dir, "ZINC*"))
     mols = []
     for zinc in zincs:
         try:
             mols.append(list(pybel.readfile('sdf', zinc)))
         except Exception:
             print("WARNING: ",
                   "Fail to load zinc ligand %s" % zinc,
                   file=sys.stderr)
     mols = [mol for sub in mols for mol in sub]
     ofn = os.path.join(self.subset_work_dir, self.ligand_code + '_1.sdf')
     self.aggregated_ofn = ofn
     ofs = pybel.Outputfile('sdf', ofn, overwrite=True)
     try:
         for mol in mols:
             mol.removeh()
             mol.data['MOLID'] = mol.title
             ofs.write(mol)
     except Exception as detail:
         print("WARNING:", detail, file=sys.stderr)
     finally:
         ofs.close()
Exemple #25
0
def main( args ):

    for infile in args.infile:
        file_extension = args.format or os.path.splitext( infile )[-1].lstrip('.')

        if not args.format and file_extension not in ['smi', 'sdf', 'inchi', 'mol']:
            sys.exit('Could not guess the format from the file extension please specify with the --format option.')

        molecules = pybel.readfile(file_extension, infile)
        for mol in molecules:
            args.outfile.write( 't # id %s\n' % mol.title.strip() )
            for atom in openbabel.OBMolAtomIter( mol.OBMol):
                label = atom.GetAtomicNum()
                vertex_index = atom.GetIdx()
                args.outfile.write('v %s %s\n' % (vertex_index, label))

            for bond in openbabel.OBMolBondIter( mol.OBMol):
                src_index = bond.GetBeginAtomIdx()
                dest_index = bond.GetEndAtomIdx()
                assert(src_index > 0)
                assert(dest_index > 0)
                if bond.IsAromatic():
                    label = 'a'
                elif bond.IsSingle():
                    label = 's'
                elif bond.IsDouble():
                    label = 'd'
                elif bond.IsTriple():
                    label = 't'
                atom1 = bond.GetBeginAtom()
                atom2 = bond.GetEndAtom()
                args.outfile.write('e %s %s %s\n' % (src_index, dest_index, label))
def main():
    fa = sys.stdin.read()
    fa = fa.rstrip()
    of = open(args['outfile'], 'w')
    of.write('cid,')
    labels = ''
    for i in properties:
        labels = labels + i + ','
    labels = re.match(r"^(.*),", labels).group(1)
    of.write(labels + '\n')
    inputTemp = tempfile.NamedTemporaryFile(suffix='.sdf', delete=False)
    inputTemp.write(fa)
    inputTempName = inputTemp.name
    inputTemp.close()
    for mol in pybel.readfile('sdf', inputTempName):
        info = ''
        myid = mol.title
        mol.addh()
        mol.make3D()
        desc = mol.calcdesc()
        for thisdesc in properties:
            info = info + str(desc[thisdesc]) + ','
        info = re.match(r"^(.*),", info).group(1)
        of.write(myid.strip() + ',' + info + '\n')
    of.close()
    os.unlink(inputTempName)
def getRingAtomsMulti(molfilePath):
	for m in pybel.readfile("mol", molfilePath):
		#print m.OBMol.GetFormula()
		outString = ""
		rings = m.OBMol.GetSSSR()
		numR = 0;
		for r in rings:
			numR = numR+1
		outString+="Ring count "+str(numR)+"\n"
		for r in rings:
			outString+="Ring size "+str(r.Size())+"\n"
			path = r._path
			for p in path:
				outString+=str(p-1)+"\n"

		outString+="Atom count "+str(m.OBMol.NumAtoms())+"\n"
		outString+="Index HowManyRings RingSize Hybridization Hydro_count Aromaticity AntiClockwise_chiral"+"\n"
		i = 0
		for a in openbabel.OBMolAtomIter(m.OBMol):
			outString+=str(i)+" "+str(a.MemberOfRingCount())+" "+str(a.MemberOfRingSize())+" "+str(a.GetHyb())+" "+str(a.ImplicitHydrogenCount())+" "
			if a.IsAromatic():
				outString+="1"+" "
			else:
				outString+="0"+" "
			if a.IsClockwise():
				outString+="1"+"\n"
			else:
				outString+="0"+"\n"
			i = i + 1

	return outString
def read_file(filename, name=None, format=None):
    """ Read a molecule from a file

    Note:
        Currently only reads the first conformation in a file

    Args:
        filename (str): path to file
        name (str): name to assign to molecule
        format (str): File format: pdb, sdf, mol2, bbll, etc.

    Returns:
        moldesign.Molecule: parsed result
    """
    # TODO: check for openbabel molecule name?
    if format is None:
        format = filename.split('.')[-1]

    if force_remote:
        with open(filename, 'r') as infile:
            mol = read_string(infile.read(), format, name=name)
        return mol
    else:
        pbmol = pb.readfile(format=format, filename=filename).next()
        if name is None: name = filename
        mol = pybel_to_mol(pbmol, name=os.path.basename(name))
        mol.filename = filename
        return mol
Exemple #29
0
    def readatoms(self, frame):
        try:
            if self.info.num_frames <= frame:
                raise IndexError("Frame {} not found".format(frame))

            file_extension = os.path.splitext(self.path)[1][1:]
            mol_iter = pybel.readfile(file_extension.encode('utf8'),
                                      self.path.encode('utf8'))

            # get the correct frame
            try:
                for _ in range(frame):
                    mol_iter.next()
                mol = mol_iter.next()
            except StopIteration:
                raise IndexError("Frame {} not found".format(frame))

            # read the atom information
            symbols = []
            positions = []
            for atom in mol.atoms:
                positions.append(tuple(float(c) for c in atom.coords))
                symbol = core.elements.symbols[atom.atomicnum]
                symbols.append(symbol)
            return data.Atoms(positions, None, symbols, self.info.volume)
        except (IOError, IndexError):
            raise
        except Exception as e:
            raise FileError("Cannot read atom data.", e)
Exemple #30
0
def run():
	
	inputfile=pybel.readfile(sys.argv[1].split(".")[-1],sys.argv[1])
	value=()
	for mol in inputfile:
		descvalues=mol.calcdesc()
		value= value+(descvalues.get('TPSA'),)
		value= value+(descvalues.get('HBD'),)
		value= value+(descvalues.get('logP'),)
		value= value+(descvalues.get('MW'),)
		value= value+(descvalues.get('tbonds'),)
		value= value+(descvalues.get('nF'),)
		value= value+(descvalues.get('bonds'),)
		value= value+(descvalues.get('atoms'),)
		value= value+(descvalues.get('HBA1'),)
		value= value+(descvalues.get('HBA2'),)
		value= value+(descvalues.get('sbonds'),)
		value= value+(descvalues.get('dbonds'),)
		value= value+(descvalues.get('MR'),)
		value= value+(descvalues.get('abonds'),)
	
		smarts = pybel.Smarts("[+]")
		num=smarts.findall(mol)				
		value= value+(len(num),)			
	
		smarts = pybel.Smarts("[-]")
		num=smarts.findall(mol)				
		value= value+(len(num),)


	model=joblib.load('volume_model/volume.pkl')
	for result in model.predict(value):
		return round(result,2)
Exemple #31
0
def get_dihedrals(fname, a1, a2, a3):
    # read all the molecules from file
    for mol in pybel.readfile(os.path.splitext(fname)[1][1:], fname):
        print(
            mol.OBMol.GetAngle(mol.OBMol.GetAtom(a1), mol.OBMol.GetAtom(a2),
                               mol.OBMol.GetAtom(a3)))
Exemple #32
0
import sys
import copy

import pybel
import openbabel

from correct_sdf import *

if __name__ == "__main__":

    nbo_filename = sys.argv[1]
    xyz_filename = sys.argv[2]

    bonds, charges, total_charge = get_bonds_nbo(nbo_filename)
    mol = pybel.readfile("xyz", xyz_filename).next()
    
    mol = delete_bonds_from_mol(mol)

    mol_corrected = add_bonds_to_mol(mol, bonds)

    total_charge_check = 0

    for i, atom in enumerate(mol):
        
        nuc = atom.OBAtom.GetAtomicNum()
        formal_charge = nuc - charges[i] 


        total_charge_check += formal_charge
Exemple #33
0
# - removes all fields with ligprep info
# - adds a field "id" with the vendor code
#####################################################################

import pybel, sys

infile = sys.argv[1]
outfile = sys.argv[2]

## remove "M CHG  0" error lines
nochg_file = infile.replace(".sdf", "_noCHG0.sdf")
out_nochg = open(nochg_file, "w")
for line in open(infile):
    if "CHG  0" not in line:
        out_nochg.write(line)
out_nochg.close()

## add the first line ID into a field "vendor_id"
#open input file (without CHG 0 lines) and output sdf file for final output file
input_sdf = pybel.readfile("sdf", nochg_file)
output_sdf = pybel.Outputfile("sdf", outfile)

#loop for each molecule
#remove all ligprep info fields and add field id with vendor_id
for mol in input_sdf:
    mol.data.clear()
    mol.data["id"] = mol.title
    output_sdf.write(mol)

output_sdf.close()
def remove_protonation( args ):
    outfile = pybel.Outputfile(args.iformat, args.output, overwrite=True)
    for mol in pybel.readfile(args.iformat, args.input):
        [atom.OBAtom.SetFormalCharge(0) for atom in mol.atoms]
        outfile.write( mol )
    outfile.close()
parser.add_argument("-glide_ranking",
                    required=True,
                    help=variables_for_actives_recovery.glide_ranking_help)
parser.add_argument(
    "-simulations_report",
    required=True,
    help=variables_for_actives_recovery.simulations_report_help)
parser.add_argument("-output_file",
                    default=variables_for_actives_recovery.output_default_name,
                    help=variables_for_actives_recovery.output_file_help)
args = parser.parse_args()

print 'processing mol2 files'
format_actives_file = args.actives_file.split('.')[-1]
total_actives_variants = [
    mol.title for mol in py.readfile(format_actives_file, args.actives_file)
]
total_actives_compounds = list(set(total_actives_variants))

format_inactives_file = args.inactives_file.split('.')[-1]
total_inactives_variants = [
    mol.title
    for mol in py.readfile(format_inactives_file, args.inactives_file)
]
total_inactives_compounds = list(set(total_inactives_variants))

total_compounds = float(
    len(total_actives_compounds) + len(total_inactives_compounds))
total_variants = float(
    len(total_actives_variants) + len(total_inactives_variants))
Exemple #36
0
def smi_split(file=""):
    for i, mol in enumerate(pybel.readfile("smi", "zz.smi")):
        temp = str(i)
        mol.write("smi", "%s.smi" % temp)
Exemple #37
0
def run(input_path,
        id_string,
        is_dir,
        inchi,
        images,
        write_mol,
        consisting_of_isoprens_required,
        ):
    print("Processing:\t%s" % input_path, id_string)
    file_handles = create_file_handles(id_string, is_dir)

    for mol in pybel.readfile("mol", input_path):
        g = mol_to_networkxgraph(mol)
        largest_component = get_largest_component_length(g)
        cid = mol.data[id_string]
        # print "%s\t%s" % (cid, largest_component)
        rings = len(mol.sssr)
        if inchi:
            out = "%s\t%s\t%s\n" % (cid, rings, mol.write("inchi").strip())
        else:
            out = "%s\t%s\n" % (cid, rings)

        if largest_component % 5 == 0:
            # a multiple of 5 carbon atoms
            if consisting_of_isoprens_required:
                if not consisting_of_isoprens(mol):
                    continue

            # all remaining atoms are none carbon atoms
            if largest_component > 40:
                file_handles["polyterpene"].write(out)
                try:
                    if images:
                        mol.draw(
                            show=False,
                            update=True,
                            usecoords=True,
                            filename=os.path.join(
                                os.path.dirname(file_handles["polyterpene"].name),
                                cid + ".png",
                            ),
                        )
                    if write_mol:
                        mol.write(
                            "sdf",
                            os.path.join(
                                os.path.dirname(file_handles["polyterpene"].name),
                                cid + ".sdf",
                            ),
                        )
                except Exception:
                    print("no image for %s" % cid)
            elif largest_component == 0:
                # print 'Largest Component is Zero:', cid
                pass
            else:
                file_handles[largest_component].write(out)
                try:
                    if images:
                        mol.draw(
                            show=False,
                            update=True,
                            usecoords=True,
                            filename=os.path.join(
                                os.path.dirname(file_handles[largest_component].name),
                                cid + ".png",
                            ),
                        )
                    if write_mol:
                        mol.write(
                            "sdf",
                            os.path.join(
                                os.path.dirname(file_handles[largest_component].name),
                                cid + ".sdf",
                            ),
                        )
                except Exception:
                    pass  # print 'no image for %s' % cid
        else:
            # __not__ a multiple of 5 carbon atoms
            file_handles["potential_terpenoids"].write(out)
            """
Exemple #38
0
def sdfVox(name, activeMatrix, trans, d, l, f):
    """
        Read in all molecules form the sdf files and save to list
    """
    molList = []
    molEnergy = 0
    molCount = 0
    os.chdir(posesPath)
    for mol in pybel.readfile('sdf', name):
        molList.append(mol)
        molCount += 1
    print(molCount)

    """
        For every molecule from the sdf file, go through voxelization process
    """
    for mol in molList:
        """
            Transforms the nuclei by the same transformations of the activesite
        """
        coords = [] #nucleus xyz location
        aNum = []   #elements atomic number
        for atom in mol:
            aNum.append(atom.atomicnum)
            coords.append(atom.coords)

        transformedNuclei = []
        for i in range(len(coords)):
            transformedNuclei.append(tuple([
            addRoundHundredth(coords[i][0], -trans[0]),
            addRoundHundredth(coords[i][1], -trans[1]),
            addRoundHundredth(coords[i][2], -trans[2])]))

        """
            Places electron cloud around each ligand atom.
        """
        os.chdir(cloudPath)
        transformedElectrons = []
        for i in range(len(transformedNuclei)):
            cloudFile = open(getAtomType(aNum[i]) + ".txt", 'r')
            for line in cloudFile:
                split = [x.strip() for x in line.split(',')]
                transformedElectrons.append(tuple([
                addRoundHundredth(transformedNuclei[i][0],float(split[0])),
                addRoundHundredth(transformedNuclei[i][1],float(split[1])),
                addRoundHundredth(transformedNuclei[i][2],float(split[2])),
                gNum(aNum[i])]))

        """
            Adds the ligand information into the protein active site matrix in
            a manner that simulates the docked pose
        """
        tempMat = activeMatrix
        dockedLigandMatrix = voxData(tempMat, transformedElectrons)

        """
            Append all voxelized values to hdf5 file
        """
        molEnergy = mol.data['minimizedAffinity']
        outEnergy = np.asarray(molEnergy, dtype = np.float32)
        d.appendVal(dockedLigandMatrix)  #Appends matrix
        l.appendVal(outEnergy)           #Appends energy
        f.appendVal(np.string_(name))    #Appends file name
    os.chdir(posesPath)
#!/usr/bin/python

import pybel, openbabel, glob, sys, os

lis = sys.argv
lis.pop(0)

for x in sorted(lis):
    for mol in pybel.readfile("g09", x):
        for ring in mol.sssr:
            homaring = "HOMA.pl "
            fluring = "FLU.pl "
            homaring += x
            fluring += x

            #        for ring in mol.sssr:
            for atom in list(ring._path):
                homaring += " " + str(atom)
                fluring += " " + str(atom)
            print homaring + " " + str(ring._path[0])
            os.system(homaring + " " + str(ring._path[0]))
            os.system(fluring + " " + str(ring._path[0]))
Exemple #40
0
def convert_input_molecule(argv):
    """Converts input into pybel molecule and returns it"""
    if argv.informat:
        return pybel.readfile(argv.informat, argv.input).next()
    else:
        return pybel.readfile(DEFAULT_INPUT, argv.input).next()
Exemple #41
0
def get_dihedrals(fname, a1, a2, a3, a4):
    # read all the molecules from file
    for mol in pybel.readfile(os.path.splitext(fname)[1][1:], fname):
        print("%f" % mol.OBMol.GetTorsion(a1, a2, a3, a4))
Exemple #42
0
def main(argv):
    mol = pybel.readfile("mopout", argv.pop()).next()
    mol.draw()
Exemple #43
0
def log2xyz(logfile):
    mol = pybel.readfile("g09", logfile).next()
    xyzfile = logfile.replace(".log", ".xyz")
    #overwrite=True
    mol.write("xyz", xyzfile, overwrite=True)
Exemple #44
0
 def __default_load(self, fileName, fileExtension):
     """ Uses pybel.readfile to read a file and returns a list of pybel.molecules"""
     return list(pybel.readfile(fileExtension, fileName))
Exemple #45
0
import pybel
line = 0
inputfile = pybel.readfile("smi", "clearance_fixpka.smiles")
outfile = open("clearance_fingerprint.txt", 'w')

num_molecule = 0

for mol in inputfile:

    outfile.write(mol.title)
    outfile.write(" ")

    maccsfile = open("fp_noduplicate.smi", 'r')
    while True:
        line_maccs = maccsfile.readline()
        line = line + 1
        if not line_maccs:
            break
        if line_maccs.find(":") > 0:
            line_maccs = line_maccs[line_maccs.find("'") +
                                    1:line_maccs.rfind("'")]
            if len(line_maccs) > 0:
                smarts = pybel.Smarts(line_maccs)
                num = smarts.findall(mol)

                outfile.write(str(len(num)))
                outfile.write(" ")

    maccsfile.close()

    outfile.write("\n")
def get_coords(ac_mol2_file):

    pmol = PandasMol2().read_mol2(ac_mol2_file)
    coords = []
    molecule = []
    for atom in pmol.df.itertuples():
        coords.append([atom.x, atom.y, atom.z])

    return np.array(coords)

if __name__=="__main__":


    for idx, sdf_dataset in enumerate(DATA_SETS):
        logp_dataset = dict()
        database = pybel.readfile('sdf', sdf_dataset)
        #read the molecules in the sdf files
        for sd_record in database:

            mol_id = sd_record.data['MOLECULEID']
            file_path = mol2_file_path[idx] + mol_id+'.mol2'
            molecule_coords = get_coords(file_path)


            #molecule.data.keys() gives all the properties
            molecule = pybel.readstring("smi", sd_record.data['SMILES'])
            #add hydrogen
            molecule.OBMol.AddHydrogens()
            #minimize the energy
            molecule.make3D(forcefield="gaff", steps=STEPS)
            molecule.localopt(forcefield="gaff", steps=STEPS)
Exemple #47
0
    def assign(self, write=False, outfn='tmp.pdb'):
        """Assign pharmacophore type.
        
        Details description of pharmacophore assign are commented in code.
        
        Parameters
        ----------
        write : logic 
            Control to write the filtered PDB after pharmacophore assignment
            Remove atoms not in element list
        outfn : str
            Output PDB file name with default 'tmp.pdb'
            
        Returns
        ---------
        AtomIdx : list
            Atom index in the structure
        AtomPharma : dict
            Dict with atom index as key and pharmacophore type as value
        
        """
        # Nine element will be used in the study
        elementint = [6, 7, 8, 9, 15, 16, 17, 35, 53]

        # supress the logging information
        pybel.ob.obErrorLog.StopLogging()

        # table of convert OB internal atom type to Sybyl
        ttab = pybel.ob.OBTypeTable()
        ttab.SetFromType("INT")
        ttab.SetToType("SYB")

        # read in molecule
        __, ft = os.path.splitext(self.fn)
        mol = pybel.readfile(ft[1:], self.fn).next()

        # convert the atom type from internal to sybyl
        for atom in mol.atoms:
            # AtomIdx.append(atom.idx)
            # convert the atom type and make it upper case
            at = ttab.Translate(atom.OBAtom.GetType())
            at = at.upper()
            atom.OBAtom.SetType(at)

        # assign pharmacophore type
        for atom in mol.atoms:
            # append atom idx to the AtomIdx
            self.AtomIdx.append(atom.idx)
            at = atom.type
            #print at

            # pharma type for element not in C,N,O,P,S,F,Cl,Br,I
            if atom.atomicnum not in elementint:
                p = 'NU'

            # pharma type for oxygen
            elif at in ['O.3', 'O.2', 'O.CO2']:
                p = 'A'
                # nbr of oxygen to be one atom or error
                nbrs = [i for i in ob.OBAtomAtomIter(atom.OBAtom)]
                if len(nbrs) == 2:
                    for nbr in ob.OBAtomAtomIter(atom.OBAtom):
                        if nbr.GetAtomicNum() == 1:
                            p = 'DA'

                elif len(nbrs) == 1:
                    nbr = list(nbrs)[0]

                    # nbr is carbon check if it is coo-
                    if nbr.GetAtomicNum() in [6, 15]:
                        c = 0
                        for nbr2 in ob.OBAtomAtomIter(nbr):
                            if nbr2.GetAtomicNum() in [8, 16]:
                                if len(list(ob.OBAtomAtomIter(nbr2))) == 1:
                                    c += 1
                        if c >= 2:
                            p = 'N'

                    elif nbr.GetAtomicNum() == 16:
                        c = 0
                        for nbr2 in ob.OBAtomAtomIter(nbr):
                            if nbr2.GetAtomicNum() == 8:
                                if len(list(ob.OBAtomAtomIter(nbr2))) == 1:
                                    c += 1
                        if c >= 3:
                            p = 'N'

            # pharma type for nitrogen
            elif at == 'N.4':
                p = 'P'
            elif at == 'N.3':
                p = 'A'
                for nbr in ob.OBAtomAtomIter(atom.OBAtom):
                    if nbr.GetAtomicNum() == 1:
                        p = 'DA'
                        break
            elif at == 'N.2':
                p = 'A'
                nbrs = [i for i in ob.OBAtomAtomIter(atom.OBAtom)]
                if len(nbrs) == 3:
                    p = 'P'
                else:
                    for nbr in ob.OBAtomAtomIter(atom.OBAtom):
                        if nbr.GetAtomicNum() == 1:
                            p = 'DA'
            elif at == 'N.1':
                p = 'A'
            elif at == 'N.AR':
                p = 'AR'
                nbrs = [i for i in ob.OBAtomAtomIter(atom.OBAtom)]
                if len(nbrs) == 3:
                    for nbr in ob.OBAtomAtomIter(atom.OBAtom):
                        if nbr.GetAtomicNum() == 1:
                            p = 'D'
                elif len(nbrs) == 2:
                    p = 'A'
            elif at == 'N.AM':
                p = 'PL'
                for nbr in ob.OBAtomAtomIter(atom.OBAtom):
                    if nbr.GetAtomicNum() == 1:
                        p = 'D'
            elif at == 'N.PL3':
                p = 'A'
                for nbr in ob.OBAtomAtomIter(atom.OBAtom):
                    #print atom.idx, at, nbr.GetAtomicNum() ,atom.OBAtom.GetBond(nbr).GetBondOrder()
                    if nbr.GetType() == 'C.CAT':
                        p = 'P'
                        break
                    elif nbr.GetAtomicNum() == 1:
                        p = 'DA'

            # pharma type for sulfur
            elif at in ['S.3', 'S.2', 'S.O', 'S.O2']:
                p = 'PL'
                nbrs = [i for i in ob.OBAtomAtomIter(atom.OBAtom)]
                if len(nbrs) == 1:
                    p = 'A'
                    nbr = nbrs[0]
                    if nbr.GetAtomicNum() == 6:
                        nbrs2 = [i for i in ob.OBAtomAtomIter(nbr)]
                        if len(nbrs2) == 4:
                            p = 'N'
                        elif len(nbrs2) == 3:
                            c = 0
                            for nbr3 in ob.OBAtomAtomIter(nbr):
                                if nbr3.GetAtomicNum() in [8, 16]:
                                    if len(list(ob.OBAtomAtomIter(nbr3))) == 1:
                                        c += 1
                            if c >= 2:
                                p = 'N'
                elif len(nbrs) == 2:
                    p = 'A'
                    for nbr in ob.OBAtomAtomIter(atom.OBAtom):
                        if nbr.GetAtomicNum() == 1:
                            p = 'DA'

            # pharma type for carbon
            elif at == 'C.AR':
                p = 'AR'
            elif at in ['C.1', 'C.2', 'C.3', 'C.CAT']:
                p = 'H'
                for nbr in ob.OBAtomAtomIter(atom.OBAtom):
                    if nbr.GetAtomicNum() in [7, 8, 9, 15, 16]:
                        p = 'PL'
                        break

            # pharma type for P and Halogen
            elif at == 'P.3':
                p = 'PL'
            elif at in ['F', 'CL', 'BR', 'I']:
                p = 'HA'

            # pharma type for general carbon not be assigned
            elif atom.atomicnum == 6:
                p = 'H'
            # pharma type for N,O,F,S,P,Cl,Br,I not be assigned
            elif atom.atomicnum in elementint:
                p = 'PL'

            # AtomPharma dict with atomicnum, pharma tyep, and coords
            # the coords is for SADE only
            self.AtomPharma[atom.idx] = [atom.atomicnum, p, atom.coords]
            #print atom.idx, AtomPharma[atom.idx]
            #print atom.idx, atom.type,  p, atom.OBAtom.GetResidue().GetName()

        if write:
            for idx in self.AtomIdx[::-1]:
                if self.AtomPharma[idx][0] not in elementint:
                    mol.OBMol.DeleteAtom(mol.OBMol.GetAtom(idx))

            output = pybel.Outputfile("pdb", outfn, overwrite=True)
            output.write(mol)
            output.close()

        return self.AtomIdx, self.AtomPharma
def extract_docking_poses(ligands_dict, no_checks=False, verbosity=0):
    """
    :param dict ligands_dict: dict containing docking poses
    :param bool no_checks: ignore checks and tries to go on
    :param int verbosity: be verbosity
    :rtype: dict
    """

    os_util.local_print(
        'Entering extract_docking_poses(poses_data={}, verbosity={})'
        ''.format(ligands_dict, verbosity),
        msg_verbosity=os_util.verbosity_level.debug,
        current_verbosity=verbosity)

    os_util.local_print('{:=^50}\n{:<15} {:<20}'.format(
        ' Poses read ', 'Name', 'File'),
                        msg_verbosity=os_util.verbosity_level.default,
                        current_verbosity=verbosity)

    docking_mol_local = {}
    for each_name, each_mol in ligands_dict.items():

        if isinstance(each_mol, str):
            ligand_format = splitext(each_mol)[1].lower()
            docking_mol_rd = generic_mol_read(ligand_format,
                                              each_mol,
                                              verbosity=verbosity)
        elif isinstance(each_mol, all_classes.Namespace):
            docking_mol_rd = generic_mol_read(each_mol.format,
                                              each_mol.data,
                                              verbosity=verbosity)
        elif isinstance(each_mol, dict):
            if isinstance(each_mol['molecule'], rdkit.Chem.Mol):
                docking_mol_rd = each_mol['molecule']
            else:
                ligand_format = each_mol.setdefault(
                    'format',
                    os.path.splitext(each_mol['molecule'])[1])
                docking_mol_rd = generic_mol_read(ligand_format,
                                                  each_mol['molecule'],
                                                  verbosity=verbosity)
        elif isinstance(each_mol, rdkit.Chem.Mol):
            docking_mol_rd = each_mol
        else:
            os_util.local_print(
                "Could not understand type {} (repr: {}) for your ligand {}"
                "".format(type(each_mol), repr(each_mol), each_name),
                current_verbosity=verbosity,
                msg_verbosity=os_util.verbosity_level.error)
            raise TypeError('Ligand must be str or all_classes.Namespace')

        if docking_mol_rd is not None:
            os_util.local_print("Read molecule {} from {}"
                                "".format(each_name, each_mol),
                                current_verbosity=verbosity,
                                msg_verbosity=os_util.verbosity_level.info)
            docking_mol_rd = mol_util.process_dummy_atoms(docking_mol_rd,
                                                          verbosity=verbosity)

            # docking_mol_local[each_name] = mol_util.rwmol_to_obmol(docking_mol_rd, verbosity=verbosity)
            docking_mol_local[each_name] = docking_mol_rd

            os_util.local_print('{:<15} {:<18}'.format(each_name,
                                                       str(each_mol)),
                                msg_verbosity=os_util.verbosity_level.default,
                                current_verbosity=verbosity)
            os_util.local_print('Read molecule {} (SMILES: {}) from file {}'
                                ''.format(
                                    each_name,
                                    rdkit.Chem.MolToSmiles(docking_mol_rd),
                                    each_mol),
                                msg_verbosity=os_util.verbosity_level.debug,
                                current_verbosity=verbosity)

        elif no_checks:
            os_util.local_print(
                'Could not read data in {} using rdkit. Falling back to openbabel. It is strongly '
                'advised you to check your file and convert it to a valid mol2.'
                ''.format(str(each_mol)),
                msg_verbosity=os_util.verbosity_level.warning,
                current_verbosity=verbosity)
            import pybel

            if verbosity <= 3:
                pybel.ob.obErrorLog.SetOutputLevel(pybel.ob.obError)
            try:
                if type(each_mol) == str:
                    ligand_format = splitext(each_mol)[1].lstrip('.').lower()
                    docking_mol_ob = pybel.readfile(ligand_format,
                                                    each_mol).__next__()
                elif type(each_mol) == all_classes.Namespace:
                    docking_mol_ob = pybel.readstring(each_mol.format,
                                                      each_mol.data)
                else:
                    os_util.local_print(
                        "Could not understand type {} (repr: {}) for your ligand {}"
                        "".format(type(each_mol), repr(each_mol), each_name))
                    raise TypeError(
                        'Ligand must be str or all_classes.Namespace')
            except (OSError, StopIteration) as error_data:
                os_util.local_print(
                    'Could not read your ligand {} from {} using rdkit nor openbabel. Please '
                    'check/convert your ligand file. Openbabel error was: {}'
                    ''.format(each_name, str(each_mol), error_data),
                    msg_verbosity=os_util.verbosity_level.error,
                    current_verbosity=verbosity)
                if not no_checks:
                    raise SystemExit(1)
            else:
                # Convert and convert back to apply mol_util.process_dummy_atoms
                docking_mol_rd = mol_util.process_dummy_atoms(
                    mol_util.obmol_to_rwmol(docking_mol_ob))
                #docking_mol_local[each_name] = mol_util.rwmol_to_obmol(docking_mol_rd)
                docking_mol_local[each_name] = docking_mol_rd

                os_util.local_print(
                    '{:<15} {:<18}'
                    ''.format(
                        each_name, each_mol['comment'] if isinstance(
                            each_mol, dict) else each_mol),
                    msg_verbosity=os_util.verbosity_level.default,
                    current_verbosity=verbosity)
                os_util.local_print(
                    'Extracted molecule {} (SMILES: {}) using openbabel fallback from {}.'
                    ''.format(each_name,
                              rdkit.Chem.MolToSmiles(docking_mol_rd),
                              str(each_mol)),
                    msg_verbosity=os_util.verbosity_level.debug,
                    current_verbosity=verbosity)
        else:
            os_util.local_print(
                'Could not read data in {} using rdkit. Please, check your file and convert it to a '
                'valid mol2. (You can also use "no_checks" to enable reading using pybel)'
                ''.format(str(each_mol)),
                msg_verbosity=os_util.verbosity_level.error,
                current_verbosity=verbosity)
            raise SystemExit(-1)

    return docking_mol_local
Exemple #49
0
 def __init__(self):
     self.Mol = pybel.readfile("xyz", self.Filename).next()
     self.AwesomeMol = ReadCoordzAwesome(self.Filename)
Exemple #50
0
def extract_features(miss_container,
                     id,
                     molcode=None,
                     db_path=Global_var.DB_GENERAL_PATH.value,
                     relative_central_mass=np.zeros((3, ))):
    '''Extract features from the complex
    
    param molcode: pointing whether it is a protein or ligand
    param id: the name of the complex code 
    param db_path: the path where the complex is located
    param relative_central_mass: the central of the ligand
    
    return coords, features, central_mass: decoded coords, features of the complex, central_mass is estimated only for ligands


'''
    atom_codes_init()

    mol_id = id if molcode == -1 else f"{id}_ligand"

    path_to_molecule = os.path.abspath("{}/{}/{}.mol2".format(
        db_path, id, mol_id))

    try:
        assert os.path.isfile(path_to_molecule)
    except:
        logger.info(f"{mol_id} was excluded")
        if molcode == 1:
            return [], [], []
        else:
            return [], []

    if (is_file_empty(path_to_molecule)):
        if molcode == 1:
            return [], [], []
        else:
            return [], []

    try:
        molecule = next(pybel.readfile('mol2', path_to_molecule))
    except Warning:
        logger.info(f"{mol_id} was excluded")
        return [], []

    coords = []
    features = []
    heavy_atoms = []

    mismatch = 0
    charges_db = read_json(id, db_path=db_path)

    total_coord_mismatch = 0

    for i, atom in enumerate(molecule):
        if atom.atomicnum > 1:
            atomic_features = [
                atom.__getattribute__(prop) for prop in NAMED_PROPS
            ]
            charge = sys.maxsize

            if (molcode == 1):
                central_mass = central_mass_compute(molecule)
                charge = atom.__getattribute__('partialcharge')
            elif (molcode == -1):
                if not are_coordinates_acceptable(
                        atom.coords, relative_central_mass, exclude_radius=10):
                    total_coord_mismatch += 1
                    continue
                charge = get_probs(miss_container, id, atom, charges_db)

            # if the charge was assigned to max, it means that the
            # particular charge does not exist in our json file
            if (charge == sys.maxsize):
                mismatch += 1
                continue

            atomic_features.append(charge)
            heavy_atoms.append(i)
            coords.append(atom.coords)

            features.append(
                np.concatenate((encode_num(atom.atomicnum), atomic_features)))

    coords = np.array(coords, dtype=np.float64)
    features = np.array(features, dtype=np.float64)

    try:
        assert features.shape[0] > 0 and coords.shape[0] > 0
        features = np.hstack((features, molcode * np.ones((len(features), 1))))
        features = np.hstack([features, find_smarts(molecule)[heavy_atoms]])
    except:
        coords = np.array([0])
        features = np.array([0])

    if molcode == 1:
        return coords, features, central_mass
    else:
        return coords, features
Exemple #51
0
import pybel
import chemml
from sklearn.externals import joblib

chemml.max_atoms = 30

data = chemml.pd.read_csv("test_pe.csv")  #load test data

C = []

mols = list(dict.fromkeys(data['molecule_name']))  #get unique molecule names

for mol_name in mols:
    m = next(pybel.readfile("xyz", "structures/" + mol_name + ".xyz"))

    if m is None:  #check if the structure is loaded
        data = data[
            data.molecule_name !=
            mol_name]  #if there is mismatch, delete this entry from dataframe
        print("Error in loading molecule: " + str(mol_name) + ". Skipping...")
        continue

    C.append(chemml.CoulombMatrixEig(m))

kr = joblib.load('pe.model')  #load model

data['predicted-pe'] = kr.predict(C)

data.to_csv(r"predicted_pe.csv")
Exemple #52
0
import pybel
import binascii
# import openbabel
#  
# obConversion = openbabel.OBConversion()
# obConversion.SetInAndOutFormats("pdbqt", "mol2")
# obmol = openbabel.OBMol()
# obConversion.ReadFile(obmol, '/home/damjan/Documents/Docking/result data/results/ligands/ZINC00000226.pdbqt')   # Open Babel will uncompress automatically
# obmol.AddHydrogens()
 
# print mol.NumAtoms()
# print mol.NumBonds()

 
# print [method for method in dir(mol) if callable(getattr(mol, method))]
mol = pybel.readfile("pdbqt", 'workspace/DockingResultRepositoryAPI/main/temp/ligands/ZINC00000125.pdbqt').next()
# mol = pybel.readfile("pdbqt", '/home/damjan/Documents/Docking/result data/results/ligands/ZINC00000226.pdbqt').next()
# mol = pybel.readfile("pdbqt", '/home/damjan/Documents/Docking/result data/results/ligands/ZINC00000384.pdbqt').next()
mol = pybel.readfile("pdbqt", '../main/temp/ligands/ZINC00000125.pdbqt').next()
# if not mol.OBMol.HasHydrogensAdded():
mol.OBMol.AddHydrogens()
    
descvalues = mol.calcdesc()
# In Python, the update method of a dictionary allows you
# to add the contents of one dictionary to another
# for key in descvalues.keys():
#     print key + ": "
#     print descvalues[key]

# for key in pybel.outformats.keys():
#     print key + ": " + pybel.outformats[key]
Exemple #53
0
import shelve
import os
import pybel
import cPickle
import pandas as pd
import shutil
df = pd.DataFrame

from clustering import DSET_PATH

with open(DSET_PATH, 'r') as f:
    dset = cPickle.load(f)

drugs = [_ for _ in pybel.readfile("sdf", "../dat/approved.txt")]
ids, sizes = [], []
for drug in drugs:
    drug_id = drug.data['DRUGBANK_ID']
    drug.removeh()
    sz = len(drug.atoms)
    sizes.append(sz)
    ids.append(drug_id)
sz_dset = df([ids, sizes]).T
sz_dset.columns = ['DRUGBANK_ID', 'HeavyAtomNum']
dset = dset.merge(sz_dset)

filter_dset = {
    'DRUGBANK_ID': [],
    'ProteinBoundLig': [],
    'HeavyAtomNum': [],
    'LigSize': [],
    'LigPath': [],
Exemple #54
0
        return dr

if __name__ == '__main__':
    import os
    import numpy as np
    import numpy.linalg as la
    from ase.calculators import mopac
    from ase import Atoms
    import pybel

    os.chdir('../test')

    MOPAC = os.path.join(os.getcwd(), 'MOPAC')
    os.environ['MOPAC_LICENSE'] = MOPAC
    os.environ['LD_LIBRARY_PATH'] = MOPAC

    mopac_calc = mopac.MOPAC()
    mopac_calc.command = 'MOPAC/MOPAC2016.exe PREFIX.mop 2> /dev/null'
    mopac_calc.set(method='pm3')

    mol = next(pybel.readfile('xyz', 'ts2.xyz'))
    atoms = Atoms(numbers=[a.atomicnum for a in mol.atoms],
                  positions=[a.coords for a in mol.atoms])
    atoms.set_positions(atoms.positions - atoms.get_center_of_mass())
    atoms.set_calculator(mopac_calc)

    irccalc = IRC(atoms, stride=0.15, mw=True, forward=True, trajectory='ts1.traj')
    for _ in irccalc.run():
        pass

                chargestate, mz, shortinchi, inchi, inputrecord[0],
                inputrecord[1])
        return [0, outstring]
    except:
        return [-1, inputrecord[0], inputrecord[1]]


cc = 0
#finp=open(infile,'r');
fout = open(outfile, 'w')
ferr = open(outerror, 'w')
fnames = open(outnames, 'w')

dblist = []
totcount = 0
for mol in pybel.readfile('sdf', infile):
    totcount += 1
    #print(totcount);
    mol.addh()
    smi = mol.write('smi').replace('\n', '').replace('\t', ' ').split(' ')[0]

    idx = int(mol.data['DATABASE_ID'].replace('YMDB', ''))
    if 'GENERIC_NAME' in mol.data:
        names = mol.data['GENERIC_NAME']
    else:
        names = 'N/A'

    print(smi, idx, names)
    #print(names);
    #print(idx);
    #> <>
Exemple #56
0
def get_sdf_molecules(sdf_file, rmsd):
    """returns list of pybel molecules and name of sdf file"""
    sdf_mol_gen = pybel.readfile('sdf', sdf_file)
    sdf_name = os.path.splitext(sdf_file)[0] + '_r' + str(rmsd) + '.sdf'
    return list(sdf_mol_gen), sdf_name
Exemple #57
0
def al_exp_ins(org,
               ec,
               k,
               exp,
               neg=None,
               beta=1.0,
               kernel='rbf',
               degree=3,
               gamma=0.005,
               iterations=100,
               batch=1,
               C=1.0,
               initial=2,
               decf=False,
               random_seed=None,
               fp='FP4',
               simfp=fptr.integer_sim):

    a = bi(org, ec)
    if neg is not None:
        a.add_from_sdf(neg, k, pos=False)
    else:
        a.random_negatives(k)

    suppl = pybel.readfile('sdf', os.path.join(CHEMPATH, exp))
    excl = []
    for mol in suppl:
        smi = mol.write('can').strip()
        cls = int(mol.data['label'])
        a.add_from_smiles(smi, k, cls)
        excl.append(smi)

    smiles_access = [t[0] for t in a.pos[k]] + [t[0] for t in a.neg[k]]
    n = max([len(str(x)) for x in smiles_access])

    if fp == 'FP4':
        x_pos_array = np.vstack(tuple([t[1] for t in a.pos[k]]))
        x_neg_array = np.vstack(tuple([t[1] for t in a.neg[k]]))

        y_obj = []

        y_obj += [1] * x_pos_array.shape[0]
        y_obj += [-1] * x_neg_array.shape[0]

        x = np.vstack((x_pos_array, x_neg_array))
        y = np.array(zip(y_obj, smiles_access),
                     dtype=[('label', 'i4'), ('smiles', '|S%s' % str(n))])

    elif fp == 'FP2':
        x_pos_array = np.vstack(
            tuple([
                np.array(fptr.reconstruct_fp(t[0], fptype='FP2'))
                for t in a.pos[k]
            ]))
        x_neg_array = np.vstack(
            tuple([
                np.array(fptr.reconstruct_fp(t[0], fptype='FP2'))
                for t in a.neg[k]
            ]))

        y_obj = []

        y_obj += [1] * x_pos_array.shape[0]
        y_obj += [-1] * x_neg_array.shape[0]

        x = np.vstack((x_pos_array, x_neg_array))
        y = np.array(zip(y_obj, smiles_access),
                     dtype=[('label', 'i4'), ('smiles', '|S%s' % str(n))])

    else:
        raise IOError("Valid values for fp are FP2 and FP4.")

    outfile = "al_expins_%s_%s_beta%s_batch%s_%s_rseed%s" % (org, ec, str(
        beta).replace('.', ''), str(batch), kernel, str(random_seed))

    out = routines.dw_exp_ins(x,
                              y,
                              outfile,
                              smiles_access,
                              excl,
                              C=C,
                              gamma=gamma,
                              iterations=iterations,
                              batch=batch,
                              degree=degree,
                              kernel=kernel,
                              beta=beta,
                              decf=decf,
                              seed=random_seed,
                              simfp=simfp,
                              initial=initial)
Exemple #58
0
def find_PiPi(pdb_file,
              lig_name,
              centroid_distance=5.0,
              dih_parallel=25,
              dih_tshape=80,
              verbose=1):
    """
    Find Pi-Pi interactions around the specified ligand residue from the pdb file.
    :param pdb_file: path of the target file in PDB format.
    :param lig_name: ligand residue name.
    :param centroid_distance: Max ring centroid distance
    :param dih_parallel: Max dihedral (parallel)
    :param dih_tshape: Min dihedral (T-shaped)
    :return: number of Pi-Pi interactions found
    """
    # Get ligand residue and print its name.
    ligAtomList = []
    ligAtomIdList = []
    mol = next(pybel.readfile('pdb', pdb_file))
    if verbose: print("A total of %s residues" % mol.OBMol.NumResidues())
    lig = None
    for res in ob.OBResidueIter(mol.OBMol):
        # print res.GetName()
        if res.GetName() == lig_name:
            lig = res
            if verbose: print("Ligand residue name is:", lig.GetName())
            break
    if not lig:
        if verbose:
            print("No ligand residue %s found, please confirm." % lig_name)
        return -1
    else:
        for atom in ob.OBResidueAtomIter(lig):
            # print atom.GetIdx()
            ligAtomList.append(atom)
            ligAtomIdList.append(atom.GetIdx())

    # Set ring_id
    i = 0
    for ring in mol.sssr:
        ring.ring_id = i
        i += 1
        # print ring.ring_id

    # Determine which rings are from ligand.
    ligRingList = []
    ligAroRingList = []
    ligRingIdList = []
    recRingList = []
    recAroRingList = []
    for ring in mol.sssr:
        for atom in ligAtomList:
            if ring.IsMember(atom):
                if ring not in ligRingList:
                    ligRingList.append(ring)
                    ligRingIdList.append(ring.ring_id)
                    if verbose:
                        print("ligand ring_ID: ", ring.ring_id, end=' ')
                    if ring.IsAromatic():
                        if verbose: print("aromatic")
                        ligAroRingList.append(ring)
                    else:
                        if verbose: print("saturated")
    for ring in mol.sssr:
        if ring.ring_id not in ligRingIdList:
            recRingList.append(ring)
            if ring.IsAromatic():
                recAroRingList.append(ring)
    if verbose: print("\nReceptor has ", len(recRingList), " rings,", end=' ')
    if verbose: print(" has ", len(recAroRingList), " aromatic rings.")

    # Find and show the rings
    ligRingCenter = ob.vector3()
    recRingCenter = ob.vector3()
    ligNorm1 = ob.vector3()
    ligNorm2 = ob.vector3()
    recNorm1 = ob.vector3()
    recNorm2 = ob.vector3()
    count = 0
    lig_ring_index = 0
    for ligRing in ligAroRingList:
        lig_ring_index += 1
        ligRing.findCenterAndNormal(ligRingCenter, ligNorm1, ligNorm2)
        rec_ring_index = 0
        for recRing in recAroRingList:
            rec_ring_index += 1
            recRing.findCenterAndNormal(recRingCenter, recNorm1, recNorm2)
            dist = ligRingCenter.distSq(recRingCenter)**0.5
            angle = vecAngle(ligNorm1, recNorm1)
            if (dist < centroid_distance and
                (angle < dih_parallel or angle > dih_tshape)):  # the criteria
                count += 1
                if verbose:
                    print(
                        "Pi-Pi ring pairs: %3s,%3s  Angle(deg.): %5.2f  Distance(A): %.2f"
                        % (recRing.ring_id, ligRing.ring_id, angle, dist))
    if verbose: print("Total Pi-Pi interactions:", count)
    return count
Exemple #59
0
                    nargs='+',
                    help='Types files to process')
parser.add_argument('--filter',
                    type=float,
                    default=100.0,
                    help='Filter out examples greater the specified value')
parser.add_argument('--suffix',
                    type=str,
                    default='_wc',
                    help='Suffix for new types files')
args = parser.parse_args()

centerinfo = dict()
#first process all gninatypes files in current directory tree
for ligfile in glob.glob('*/*_ligand.sdf'):
    mol = next(pybel.readfile('sdf', ligfile))
    #calc center
    center = np.mean([a.coords for a in mol.atoms], axis=0)
    dir = ligfile.split('/')[0]
    for gtypes in glob.glob('%s/*.gninatypes' % dir):
        buf = open(gtypes, 'rb').read()
        n = len(buf) / 4
        vals = np.array(struct.unpack('f' * n, buf)).reshape(n / 4, 4)
        lcenter = np.mean(vals, axis=0)[0:3]
        dist = np.linalg.norm(center - lcenter)
        centerinfo[gtypes] = dist

for tfile in args.typefiles:
    fname, ext = os.path.splitext(tfile)
    outname = fname + args.suffix + ext
    out = open(outname, 'w')
Exemple #60
0
def to_OBMol(file_list):
    """Converts given MOPAC output to pybel format. Returns list"""
    return [pybel.readfile("mopout", filename).next().OBMol for filename in file_list]