def compute_properties(args): if args.oformat == "sdf": outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True) else: outfile = open(args.output, "w") if args.header: mol = next(pybel.readfile(args.iformat, args.input)) metadata = cheminfolib.get_properties_ext(mol) outfile.write( "%s\n" % "\t".join([cheminfolib.ColumnNames[key] for key in metadata])) for mol in pybel.readfile(args.iformat, args.input): if mol.OBMol.NumHvyAtoms() > 5: metadata = cheminfolib.get_properties_ext(mol) if args.oformat == "sdf": [ mol.data.update( {cheminfolib.ColumnNames[key]: metadata[key]}) for key in metadata ] outfile.write(mol) else: outfile.write( "%s\n" % ("\t".join([str(metadata[key]) for key in metadata]))) outfile.close()
def pairwise_atomic_types(path, processed_dict, atom_types, atom_types_): keys = [(i, j) for i in atom_types_ for j in atom_types] for name in tqdm(os.listdir(path)): if len(name) != 4: continue ligand = next( pybel.readfile('mol2', '%s/%s/%s_ligand.mol2' % (path, name, name))) pocket = next( pybel.readfile('pdb', '%s/%s/%s_protein.pdb' % (path, name, name))) coords_lig = np.vstack([atom.coords for atom in ligand]) coords_poc = np.vstack([atom.coords for atom in pocket]) atom_map_lig = [atom.atomicnum for atom in ligand] atom_map_poc = [atom.atomicnum for atom in pocket] dm = distance_matrix(coords_lig, coords_poc) # print(coords_lig.shape, coords_poc.shape, dm.shape) ligs, pocks = dist_filter(dm, 12) # print(len(ligs),len(pocks)) fea_dict = {k: 0 for k in keys} for x, y in zip(ligs, pocks): x, y = atom_map_lig[x], atom_map_poc[y] if x not in atom_types or y not in atom_types_: continue fea_dict[(y, x)] += 1 processed_dict[name]['type_pair'] = list(fea_dict.values()) return processed_dict
def LogRead(self): # lê as infos no .log e salva em um arquivo # https://docs.python.org/3.1/tutorial/datastructures.html for n in range(len(self.smiles)): try: os.mkdir(self.path + "/xyz") except: for molecule in pybel.readfile( 'g09', '{path}/log/opt_molecule_{n}.log'.format( path=self.path, name=self.name, n=n)): #print(molecule.molwt) molecule weigth mass output = pybel.Outputfile('xyz', 'xyz/data_{n}.xyz'.format(n=n), overwrite=True) output.write(molecule) finally: for molecule in pybel.readfile( 'g09', '{path}/log/opt_molecule_{n}.log'.format( path=self.path, name=self.name, n=n)): #print(molecule.molwt) molecule weigth mass output = pybel.Outputfile('xyz', 'xyz/data_{n}.xyz'.format(n=n), overwrite=True) output.write(molecule) with open( 'log/{name}_molecule_{n}.log'.format(name=self.name, n=n), 'r') as file: lines = file.readlines() print(lines) if str(self.name) == 'sp': i = 'energy' x = 1 energy = next(i for i in lines if x > 0) print(energy) ###
def GetOptimizedMol( arc_file: str = None, inputmol: Chem.Mol = None, method: str = 'PM3', version: str = '7.1', verbose: bool = False, dispose: bool = True, ) -> Union[pybel.Molecule, None]: """Optimize molecule geometry with MOPAC and return the optimized molecule. In order not to optimize molecule multiple times, an ARC file may be provided. The path to the MOPAC output file is determined based on the one of the provided ARC file. If not already optimized, a molecule may be provided. :param arc_file: Path to MOPAC .arc file (ignored if inputmol provided). :param inputmol: molecule to optimize (ignored if arc_file provided). :param method: semi-empirical method to apply (ignored if arc_file provided). :param version: version of MOPAC to be used (ignored if arc_file provided). :param verbose: whether to print progress messages (ignored if arc_file provided). :param dispose: whether to remove generated MOPAC output files (ignored if arc_file provided). :return: optimized rdkit molecule on success, None otherwise. """ if arc_file is None and inputmol is None: raise ValueError('Either ARC file or inputmolecule must be provided.') if arc_file is not None: mopac_out_dir = os.path.dirname(arc_file) mopac_out_path = GetFileInDirFromExt(mopac_out_dir, '.out') mopac_out_path = mopac_out_path[0] if len(mopac_out_path) == 1 \ else GetLastestCreatedFile(filepaths=mopac_out_path) if not len(mopac_out_path): return None pybelmol = next(pybel.readfile('mopout', mopac_out_path)) return Chem.MolFromMol2Block(pybelmol.write(format='mol2')) else: res = GetARCFile(inputmol, method, version, verbose, False) if res is None: return None dir_, arc_file_ = res mopac_out_path = GetFileInDirFromExt(dir_, '.out') mopac_out_path = mopac_out_path[0] if len(mopac_out_path) == 1 \ else GetLastestCreatedFile(filepaths=mopac_out_path) if not len(mopac_out_path): return None pybelmol = next(pybel.readfile('mopout', mopac_out_path)) if dispose: Dispose(dir_) return pybelmol
def moved(output, reference, refformat): mola = next(pybel.readfile('pdb', output)) molb = next(pybel.readfile(refformat, reference)) for a, b in zip(mola.atoms, molb.atoms): dist = np.linalg.norm(np.array(a.coords) - np.array(b.coords)) if dist > 1e-3: return True print(output) return False
def eliminar_repetits(sdf_file): mols = [mol for mol in pybel.readfile("sdf", sdf_file)] unique_mols = { mol.write("inchi"): mol for mol in pybel.readfile("sdf", sdf_file) } outputsdf = pybel.Outputfile("sdf", str(sdf_file[:-4]) + "_uniques.sdf", overwrite=True) for mol in unique_mols.itervalues(): outputsdf.write(mol) outputsdf.close()
def create_pdbqt_from_pdb_file(pdb_filepath, pdbqt_filepath, pH=7.4): """ Convert a PDB file to a PDBQT file, while adding hydrogen atoms, correcting the protonation state, and assigning partial charges. Parameters ---------- pdb_filepath: str or pathlib.Path Path to input PDB file. pdbqt_filepath: str or pathlib.path Path to output PDBQT file. pH: float pH value for defining the protonation state of the atoms. Returns ------- openbabel.pybel.Molecule Molecule object of PDB file optimized for docking. """ # readfile() provides an iterator over the Molecules in a file. # To access the first (and possibly only) molecule in a file, # we use next() molecule = next( pybel.readfile("pdb", str(Path(pdb_filepath).with_suffix(".pdb")))) optimize_structure_for_docking(molecule, protonate_for_pH=pH) molecule.write("pdbqt", str(Path(pdbqt_filepath).with_suffix(".pdbqt")), overwrite=True) return
def xyz_to_pyMol(xyz, cluster_bond_path=None): mol = next(pybel.readfile('xyz', xyz)) if cluster_bond_path: m = pybel.ob.OBMol() m.BeginModify() for atom in mol: coords = [coord for coord in atom.coords] atomno = atom.atomicnum obatom = ob.OBAtom() obatom.thisown = 0 obatom.SetAtomicNum(atomno) obatom.SetVector(*coords) m.AddAtom(obatom) del obatom with open(cluster_bond_path, 'r') as f: lines = f.read() cluster_bond = eval(lines) bonds = [(bond.GetBeginAtomIdx(), bond.GetEndAtomIdx(), bond.GetBondOrder()) for bond in pybel.ob.OBMolBondIter(mol.OBMol)] bonds.extend(cluster_bond) for bond in bonds: m.AddBond(bond[0], bond[1], bond[2]) pybelmol = pybel.Molecule(m) return pybelmol else: return mol
def __main__(): """ Select compounds with certain properties from a small library """ args = parse_command_line() if args.filters == "__filter_by_name__": filter_by_name(args) return # Its a small trick to get the parameters in an easy way from the xml file. # To keep it readable in the xml file, many white-spaces are included in that string it needs to be removed. # Also the last loop creates a ',{' that is not an valid jason expression. filters = json.loads((args.filters).replace(" ", "").replace(",}", "}")) if args.iformat == "sdf": # Check if the sdf file contains all of the required metadata to invoke the precalculation filtering mol = next(pybel.readfile("sdf", args.input)) for key, elem in filters.items(): property = cheminfolib.ColumnNames.get(key, key) if property not in mol.data: break else: # if the for loop finishes in a normal way, we should habe all properties at least in the first molecule # assume it is the same for all other molecules and start the precalculated filtering filter_precalculated_compounds(args, filters) return True filter_new_compounds(args, filters)
def getProperties(mol): try: from openbabel import pybel except ImportError: raise ImportError( 'Could not import openbabel. The atomtyper requires this dependency so please install it with `conda install openbabel -c acellera`' ) name = NamedTemporaryFile(suffix='.pdb').name mol.write(name) mpybel = next(pybel.readfile('pdb', name)) # print(name) residues = pybel.ob.OBResidueIter(mpybel.OBMol) atoms = [[ r.GetName(), r.GetNum(), r.GetAtomID(at), at.GetType(), round(at.GetPartialCharge(), 3) ] for r in residues for at in pybel.ob.OBResidueAtomIter(r)] os.remove(name) return atoms
def loadall(fname: str): """ Load molecules from file. Parameters ---------- fname: str File name Returns ------- List of molecules """ fmt = utils.molformat(fname) obmols = [obmol for obmol in pybel.readfile(fmt, fname)] # FIXME: Special handling for multi-model PDB files # See OpenBabel Issue #2097 if fmt == "pdb": if len(obmols) > 1: # Multi-model PDB file obmols = obmols[:-1] return obmols
def getLigandPrints(flist): ''' Get list of ligand fingerprints ''' fingerprints = [] names = [] for fname in flist: base,ext = os.path.splitext(fname) ext = ext.split('.')[-1] if ext == 'smi' or ext == 'ism': with open(fname, 'r') as f: for line in f: contents = line.split() smi = contents[0] name = contents[1] m = pybel.readstring('smi', smi) fingerprints.append(m.calcfp('ecfp4')) names.append(name) else: try: mols = pybel.readfile(ext, fname) for m in mols: fingerprints.append(m.calcfp('ecfp4')) except Exception as e: print(e) return (fingerprints,names)
def fitmol(fname, niters=10): print('Reading {}'.format(fname)) m = next(pybel.readfile('sdf', fname)) m.OBMol.Center() #put in center of box! m.addh() ligname = os.path.split(fname)[1] print('Typing input molecule') cset = molgrid.CoordinateSet(m, typer) print('Creating empty grid') mgrid_values = torch.zeros(gmaker.grid_dimensions(cset.num_types()), dtype=torch.float32, device=device) print('Calling gmaker forward') gmaker.forward((0, 0, 0), cset, mgrid_values) mgrid = generate.MolGrid(mgrid_values, channels, np.zeros(3), 0.5) types = generate.count_types(cset.type_index.tonumpy().astype(int), cset.num_types(), dtype=np.int16) grid = simple_atom_fit(mgrid, types, niters) struct = grid.info['src_struct'] loss = struct.info['loss'] fittime = struct.info['time'] fixes = struct.info['n_steps'] try: rmsd = get_min_rmsd(cset.coords, cset.type_index.tonumpy(), struct.xyz, struct.c) except: rmsd = np.inf return struct, fittime, loss, fixes, rmsd
def patch_scores_sdf(sdf_in, outfile, scores): global work_dir counter = 0 sdf_path = "{0}{1}{2}.sdf".format(work_dir, os.path.sep, outfile) tsv_path = "{0}{1}{2}.tsv".format(work_dir, os.path.sep, outfile) utils.log("Writing results to {0} and {1}".format(sdf_path, tsv_path)) with open(tsv_path, 'w') as tsv_file: sdf_file = pybel.Outputfile("sdf", sdf_path) for mol in pybel.readfile("sdf", sdf_in): if counter in scores: score = scores[counter] # utils.log("Score for record {0} is {1}".format(counter, score)) mol.data['dls_deep_score'] = score if 'SCORE' in mol.data: rdock_score = mol.data['SCORE'] else: rdock_score = '' if 'SCORE.norm' in mol.data: rdock_nscore = mol.data['SCORE.norm'] else: rdock_nscore = '' sdf_file.write(mol) tsv_file.write("{0}\t{1}\t{2}\t{3}\n".format( counter, rdock_score, rdock_nscore, score)) else: utils.log("No score found for record", counter) counter += 1 sdf_file.close()
def filter_by_name(args): outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True) for mol in pybel.readfile("sdf", args.input): for name in open(args.list_of_names): if mol.title.strip() == name.strip(): outfile.write(mol) outfile.close()
def readXYZ(xyz, bonds=None): # extract molecule information from xyz mol = next(pb.readfile('xyz', xyz)) # Manually give bond information # (Because in metal system the bond information detect by openbabel usually have some problem) m = Molecule(pb.ob.OBMol()) obmol = m.OBMol obmol.BeginModify() for atom in mol: coords = [coord for coord in atom.coords] atomno = atom.atomicnum obatom = ob.OBAtom() obatom.thisown = 0 obatom.SetAtomicNum(atomno) obatom.SetVector(*coords) obmol.AddAtom(obatom) del obatom bonds = [(bond.GetBeginAtomIdx(), bond.GetEndAtomIdx(), bond.GetBondOrder()) for bond in pb.ob.OBMolBondIter(mol.OBMol)] bonds.extend([(12, 14, 1), (12, 15, 1), (12, 16, 1), (12, 17, 1), (12, 13, 1), (17, 23, 1), (16, 23, 1)]) for bond in bonds: obmol.AddBond(bond[0], bond[1], bond[2]) # obmol.PerceiveBondOrders() # obmol.SetTotalCharge(int(mol.charge)) # obmol.Center() obmol.EndModify() mol_obj = gen3D.Molecule(obmol) return mol_obj
def getOpenBabelProperties(pdb, outfile): try: from openbabel import pybel except ImportError: print( "Could not import openbabel. The atomtyper requires this dependency so please install it with `conda install openbabel -c conda-forge`" ) sys.exit(1) try: mpybel = next(pybel.readfile("pdb", pdb)) except Exception: traceback.print_exc() sys.exit(2) try: with open(outfile, "w") as f: for r in pybel.ob.OBResidueIter(mpybel.OBMol): for at in pybel.ob.OBResidueAtomIter(r): f.write( f"{at.GetIndex()},{r.GetName()},{r.GetNum()},{r.GetAtomID(at)},{at.GetType()},{at.GetPartialCharge():.3f}\n" ) except Exception: traceback.print_exc() sys.exit(3)
def __init__(self, path, name, calc): self.path = path self.name = name self.calc = calc self.normal = [] self.error = [] self.smiles = list(pybel.readfile('smi', '{}/smiles.smi'.format(path))) self.charge = list()
def addh(args): outfile = pybel.Outputfile(args.iformat, args.output, overwrite=True) for mol in pybel.readfile(args.iformat, args.input): if mol.OBMol.NumHvyAtoms() > 5: mol.removeh() mol.OBMol.AddHydrogens(args.polar, True, args.pH) outfile.write(mol) outfile.close()
def default_open_input_sdf(filename): """Open the input as a SD file (possibly gzipped if ending with .gz) according to RDKit's ForwardSDMolSupplier :param filename: The name of the file. """ suppl = pybel.readfile("sdf", filename) return None, suppl
def pdb_to_pdbqt(pdb, save_path): m = list(pybel.readfile('pdb', pdb)) assert len(m) == 1 m = m[0] m.addh() m.write('pdbqt', save_path, opt={'r': True}, overwrite=True) # opt:r = rigid - less errors?? - revisit this return save_path
def are_similar(xyz, sdf): mola = next(pybel.readfile('xyz', xyz)) molb = next(pybel.readfile('sdf', sdf)) #do an n^s comparison, ensure bijection atommap = dict() bseen = set() for a in mola.atoms: for b in molb.atoms: dist = np.linalg.norm(np.array(a.coords) - np.array(b.coords)) if dist < 0.1: assert a.idx not in atommap assert b.idx not in bseen atommap[a.idx] = b.idx bseen.add(b.idx) break else: #did not break, nothing matched a return False return True
def remove_protonation(args): outfile = pybel.Outputfile(args.iformat, args.output, overwrite=True) for mol in pybel.readfile(args.iformat, args.input): [atom.OBAtom.SetFormalCharge(0) for atom in mol.atoms] if 'inchi' in mol.data: del mol.data[ 'inchi'] # remove inchi cache so modified mol is saved outfile.write(mol) outfile.close()
def remove_ions(args): outfile = pybel.Outputfile(args.iformat, args.output, overwrite=True) for mol in pybel.readfile(args.iformat, args.input): if mol.OBMol.NumHvyAtoms() > 5: mol.OBMol.StripSalts(0) # Check if new small fragments have been created and remove them if mol.OBMol.NumHvyAtoms() > 5: outfile.write(mol) outfile.close()
def prepare_dataset(self,hdf_mode='w'): get_id = lambda structure_id: re.sub('_[0-9]+$', '', structure_id) data_path = self.data_dir ids = os.listdir(data_path) multiple_pockets = {} with h5py.File(self.hdf_path, mode=hdf_mode) as f: for structure_id in iter(ids): protein = next(pybel.readfile('mol2',os.path.join(data_path,structure_id,'protein.mol2'))) pocket = next(pybel.readfile('mol2',os.path.join(data_path,structure_id,'cavity6.mol2'))) pocket_coords, pocket_features = self.featurizer.pocket_featurizer(pocket) prot_coords, prot_features = self.featurizer.protein_featurizer(protein) centroid = prot_coords.mean(axis=0) pocket_coords -= centroid prot_coords -= centroid group_id = get_id(structure_id) if group_id in f: group = f[group_id] if not np.allclose(centroid, group['centroid'][:], atol=0.5): print('Structures for %s are not aligned, ignoring pocket %s' % (group_id, structure_id)) continue multiple_pockets[group_id] = multiple_pockets.get(group_id, 1) + 1 for key, data in (('pocket_coords', pocket_coords), ('pocket_features', pocket_features)): data = np.concatenate((group[key][:], data)) del group[key] group.create_dataset(key, data=data, shape=data.shape, dtype='float32', compression='lzf') else: group = f.create_group(group_id) for key, data in (('coords', prot_coords), ('features', prot_features), ('pocket_coords', pocket_coords), ('pocket_features', pocket_features), ('centroid', centroid)): group.create_dataset(key, data=data, shape=data.shape, dtype='float32', compression='lzf')
def Inputs(self): '''turn smiles.smi into 3D structures and save in a file ''' for n in range(len(self.smiles)): word = 'opt' #ve se tem opt no input e calcula puxando do smile if word in self.calc[2].lower().split(): smi = self.smiles[n] smi.make3D(forcefield='mmff94', steps=50) try: os.mkdir(self.path + "/input") except: output = pybel.Outputfile( 'xyz', 'input/{name}_input_{n}.com'.format(name=self.name, n=n), overwrite=True) output.write(smi) finally: output = pybel.Outputfile( 'xyz', 'input/{name}_input_{n}.com'.format(name=self.name, n=n), overwrite=True) output.write(smi) else: for molecule in pybel.readfile( 'g09', '{path}/log/opt_molecule_{n}.log'.format( path=self.path, name=self.name, n=n)): output = pybel.Outputfile( 'xyz', 'input/{name}_input_{n}.com'.format(name=self.name, n=n), overwrite=True) output.write(molecule) with open('input/{name}_input_{n}.com'.format(name=self.name, n=n), 'r') as file: lines = file.readlines() with open('input/{name}_input_{n}.com'.format(name=self.name, n=n), 'w') as file: a = self.header(n) lines[1] = '\n' for i in range(0, 6): lines[0] += a[i] lines[-1] += '\n' file.writelines(lines) file.close() with open('input/{name}_job_{n}.sh'.format(name=self.name, n=n), 'w') as file: file.write(a[6] + '\n' + a[7] + '\n' + a[8] + '\n' + a[9]) subprocess.run('chmod a+x {path}/input/{name}_job_{n}.sh'.format( name=self.name, path=self.path, n=n), shell=True) # cria input.com e job.sh
def gen_feature(path, name, featurizer): charge_idx = featurizer.FEATURE_NAMES.index('partialcharge') ligand = next( pybel.readfile('mol2', '%s/%s/%s_ligand.mol2' % (path, name, name))) ligand_coords, ligand_features = featurizer.get_features(ligand, molcode=1) pocket = next( pybel.readfile('mol2', '%s/%s/%s_pocket.mol2' % (path, name, name))) pocket_coords, pocket_features = featurizer.get_features(pocket, molcode=-1) node_num = pocket_atom_num_from_mol2(name, path) pocket_coords = pocket_coords[:node_num] pocket_features = pocket_features[:node_num] try: assert (ligand_features[:, charge_idx] != 0).any() assert (pocket_features[:, charge_idx] != 0).any() assert (ligand_features[:, :9].sum(1) != 0).all() except: print(name) lig_atoms, pock_atoms = [], [] for i, atom in enumerate(ligand): if atom.atomicnum > 1: lig_atoms.append(atom.atomicnum) for i, atom in enumerate(pocket): if atom.atomicnum > 1: pock_atoms.append(atom.atomicnum) for x in pock_atoms[node_num:]: assert x == 8 pock_atoms = pock_atoms[:node_num] assert len(lig_atoms) == len(ligand_features) and len(pock_atoms) == len( pocket_features) ligand_edges = gen_pocket_graph(ligand) pocket_edges = gen_pocket_graph(pocket) return { 'lig_co': ligand_coords, 'lig_fea': ligand_features, 'lig_atoms': lig_atoms, 'lig_eg': ligand_edges, 'pock_co': pocket_coords, 'pock_fea': pocket_features, 'pock_atoms': pock_atoms, 'pock_eg': pocket_edges }
def MolFormatConversion(input_file: str, output_file: str, input_format="xyz", output_format="sdf"): molecules = readfile(input_format, input_file) output_file_writer = Outputfile(output_format, output_file) for i, molecule in enumerate(molecules): output_file_writer.write(molecule) output_file_writer.close() print('%d molecules converted' % (i + 1))
def getsmilesfromcdxml(filecdxml, lmolecules): # SMILES ARE EXTRACTED ACCORDING TO THE ID OF THE FRAGMENTS # so i have first to sort the molecules IDs before associating the corresponding smile odersmiles = np.argsort(np.asarray([int(i['id']) for i in lmolecules])) for i, mol in enumerate(pybel.readfile("cdxml", filecdxml)): # print(mol.formula,mol.molwt,len(mol.OBMol.GetSSSR()),lmolecules[odersmiles[i]]['name']) smile = mol.write("smi") # remove tabs and number of atoms smile = re.sub(r'\t.*', '', smile, flags=re.M).strip() # UPDATE THE MOL DICTIONARIES WITH THE SMILES lmolecules[odersmiles[i]]['smile'] = smile
def coordinates(mol2_file): molecule = next(pybel.readfile("mol2", mol2_file)) coords = [] # data = defaultdict(list) for atom in molecule.atoms: coords.append(atom.coords) return np.array(coords)