def LogRead(self): # lê as infos no .log e salva em um arquivo # https://docs.python.org/3.1/tutorial/datastructures.html for n in range(len(self.smiles)): try: os.mkdir(self.path + "/xyz") except: for molecule in pybel.readfile( 'g09', '{path}/log/opt_molecule_{n}.log'.format( path=self.path, name=self.name, n=n)): #print(molecule.molwt) molecule weigth mass output = pybel.Outputfile('xyz', 'xyz/data_{n}.xyz'.format(n=n), overwrite=True) output.write(molecule) finally: for molecule in pybel.readfile( 'g09', '{path}/log/opt_molecule_{n}.log'.format( path=self.path, name=self.name, n=n)): #print(molecule.molwt) molecule weigth mass output = pybel.Outputfile('xyz', 'xyz/data_{n}.xyz'.format(n=n), overwrite=True) output.write(molecule) with open( 'log/{name}_molecule_{n}.log'.format(name=self.name, n=n), 'r') as file: lines = file.readlines() print(lines) if str(self.name) == 'sp': i = 'energy' x = 1 energy = next(i for i in lines if x > 0) print(energy) ###
def Inputs(self): '''turn smiles.smi into 3D structures and save in a file ''' for n in range(len(self.smiles)): word = 'opt' #ve se tem opt no input e calcula puxando do smile if word in self.calc[2].lower().split(): smi = self.smiles[n] smi.make3D(forcefield='mmff94', steps=50) try: os.mkdir(self.path + "/input") except: output = pybel.Outputfile( 'xyz', 'input/{name}_input_{n}.com'.format(name=self.name, n=n), overwrite=True) output.write(smi) finally: output = pybel.Outputfile( 'xyz', 'input/{name}_input_{n}.com'.format(name=self.name, n=n), overwrite=True) output.write(smi) else: for molecule in pybel.readfile( 'g09', '{path}/log/opt_molecule_{n}.log'.format( path=self.path, name=self.name, n=n)): output = pybel.Outputfile( 'xyz', 'input/{name}_input_{n}.com'.format(name=self.name, n=n), overwrite=True) output.write(molecule) with open('input/{name}_input_{n}.com'.format(name=self.name, n=n), 'r') as file: lines = file.readlines() with open('input/{name}_input_{n}.com'.format(name=self.name, n=n), 'w') as file: a = self.header(n) lines[1] = '\n' for i in range(0, 6): lines[0] += a[i] lines[-1] += '\n' file.writelines(lines) file.close() with open('input/{name}_job_{n}.sh'.format(name=self.name, n=n), 'w') as file: file.write(a[6] + '\n' + a[7] + '\n' + a[8] + '\n' + a[9]) subprocess.run('chmod a+x {path}/input/{name}_job_{n}.sh'.format( name=self.name, path=self.path, n=n), shell=True) # cria input.com e job.sh
def compute_properties(args): if args.oformat == "sdf": outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True) else: outfile = open(args.output, "w") if args.header: mol = next(pybel.readfile(args.iformat, args.input)) metadata = cheminfolib.get_properties_ext(mol) outfile.write( "%s\n" % "\t".join([cheminfolib.ColumnNames[key] for key in metadata])) for mol in pybel.readfile(args.iformat, args.input): if mol.OBMol.NumHvyAtoms() > 5: metadata = cheminfolib.get_properties_ext(mol) if args.oformat == "sdf": [ mol.data.update( {cheminfolib.ColumnNames[key]: metadata[key]}) for key in metadata ] outfile.write(mol) else: outfile.write( "%s\n" % ("\t".join([str(metadata[key]) for key in metadata]))) outfile.close()
def print_output(args, rows): if args.oformat == 'table': outfile = open(args.output, 'w') requested_fields = (filter(lambda x: x not in ["[", "]", "'"], args.fetch)).split(', ') if args.header: outfile.write( 'Identifier\t' + '\t'.join( [ColumnNames[key] for key in requested_fields] ) + '\n' ) for row in rows: outfile.write( row['synonym'] + '\t' + '\t'.join( [str(row[key]) for key in requested_fields] ) + '\n' ) elif args.oformat in ['sdf', 'mol2']: outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True) for row in rows: try: mol = pybel.readstring('sdf', row['mol']) if args.oformat == 'sdf': keys = filter(lambda x: x not in ["[", "]", "'"], args.fetch).split(', ') mol.data.update( { ColumnNames['synonym'] : row['synonym'] } ) if 'inchi_key' in keys: keys = (', '.join(keys).replace( "inchi_key", "inchi_key_first, inchi_key_last" )).split(', ') [ mol.data.update( { ColumnNames[key] : row[key] } ) for key in keys if key] outfile.write(mol) except: pass else: outfile = open(args.output, 'w') outfile.write( '\n'.join( [ '%s\t%s' % (row[args.oformat], row['synonym'] ) for row in rows ] ) ) outfile.close()
def filter_by_name(args): outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True) for mol in pybel.readfile("sdf", args.input): for name in open(args.list_of_names): if mol.title.strip() == name.strip(): outfile.write(mol) outfile.close()
def patch_scores_sdf(sdf_in, outfile, scores): global work_dir counter = 0 sdf_path = "{0}{1}{2}.sdf".format(work_dir, os.path.sep, outfile) tsv_path = "{0}{1}{2}.tsv".format(work_dir, os.path.sep, outfile) utils.log("Writing results to {0} and {1}".format(sdf_path, tsv_path)) with open(tsv_path, 'w') as tsv_file: sdf_file = pybel.Outputfile("sdf", sdf_path) for mol in pybel.readfile("sdf", sdf_in): if counter in scores: score = scores[counter] # utils.log("Score for record {0} is {1}".format(counter, score)) mol.data['dls_deep_score'] = score if 'SCORE' in mol.data: rdock_score = mol.data['SCORE'] else: rdock_score = '' if 'SCORE.norm' in mol.data: rdock_nscore = mol.data['SCORE.norm'] else: rdock_nscore = '' sdf_file.write(mol) tsv_file.write("{0}\t{1}\t{2}\t{3}\n".format( counter, rdock_score, rdock_nscore, score)) else: utils.log("No score found for record", counter) counter += 1 sdf_file.close()
def merge_molecules_to_single_file(list_of_pybel_molecule_objects, output_filetype, output_filepath): """ Create a single file containing several molecules. Parameters ---------- list_of_pybel_molecule_objects : list of openbabel.pybel.Molecule List of molecule ojects to be merged into a single file. output_filetype : str Type of the output file. Examples: 'sdf', 'pdb', 'pdbqt' etc. For a full list of acceptable file types, call pybel.outformats output_filepath : str or pathlib.Path Path of the output file including file name, but excluding extension. Returns ------- pathlib.Path Full path (including extension) of the output file. """ fullpath = Path(f"{output_filepath}.{output_filetype}") merged_molecule_file = pybel.Outputfile(output_filetype, str(fullpath)) for pybel_molecule_object in list_of_pybel_molecule_objects: merged_molecule_file.write(pybel_molecule_object) merged_molecule_file.close() return fullpath
def addh(args): outfile = pybel.Outputfile(args.iformat, args.output, overwrite=True) for mol in pybel.readfile(args.iformat, args.input): if mol.OBMol.NumHvyAtoms() > 5: mol.removeh() mol.OBMol.AddHydrogens(args.polar, True, args.pH) outfile.write(mol) outfile.close()
def remove_protonation(args): outfile = pybel.Outputfile(args.iformat, args.output, overwrite=True) for mol in pybel.readfile(args.iformat, args.input): [atom.OBAtom.SetFormalCharge(0) for atom in mol.atoms] if 'inchi' in mol.data: del mol.data[ 'inchi'] # remove inchi cache so modified mol is saved outfile.write(mol) outfile.close()
def remove_ions(args): outfile = pybel.Outputfile(args.iformat, args.output, overwrite=True) for mol in pybel.readfile(args.iformat, args.input): if mol.OBMol.NumHvyAtoms() > 5: mol.OBMol.StripSalts(0) # Check if new small fragments have been created and remove them if mol.OBMol.NumHvyAtoms() > 5: outfile.write(mol) outfile.close()
def write_molecules(self): # change directory and create xyz file os.chdir('..') outstream = pybel.Outputfile("xyz", "path" + str(self.step) + ".xyz") for mol in self.images: mol.pybelobj.OBMol.SetTitle(mol.name + " Energy=" + str(mol.energy)) outstream.write(mol.pybelobj) outstream.close() # change to working directory os.chdir('temp')
def remove_ions(args): outfile = pybel.Outputfile(args.iformat, args.output, overwrite=True) for mol in pybel.readfile(args.iformat, args.input): if mol.OBMol.NumHvyAtoms() > 5: mol.OBMol.StripSalts(0) if 'inchi' in mol.data: del mol.data[ 'inchi'] # remove inchi cache so modified mol is saved # Check if new small fragments have been created and remove them if mol.OBMol.NumHvyAtoms() > 5: outfile.write(mol) outfile.close()
def d3_viewer(self, molecule, viewer='avogadro'): """ Opens the geometry of the molecule in a 3d viewer. Currently only avogadro is supported, but you can test it with other programms as well. """ mol = self.one_mol_from_sdf(molecule) output = pybel.Outputfile('sdf', ".tmp.sdf", overwrite=True) output.write(mol) call([viewer, '.tmp.sdf']) os.remove(".tmp.sdf")
def eliminar_repetits(sdf_file): mols = [mol for mol in pybel.readfile("sdf", sdf_file)] unique_mols = { mol.write("inchi"): mol for mol in pybel.readfile("sdf", sdf_file) } outputsdf = pybel.Outputfile("sdf", str(sdf_file[:-4]) + "_uniques.sdf", overwrite=True) for mol in unique_mols.itervalues(): outputsdf.write(mol) outputsdf.close()
def seperate(self): """ Seperates the whole SD file into a file per solute """ timestamp = time.strftime("%Y%m%d%H%M%S") folder = 'seperated_files_' + timestamp os.mkdir(folder) os.chdir(folder) for mol in self.sdf_file: singlefile = pybel.Outputfile("sdf", str(mol.title) + '.sdf', overwrite=True) singlefile.write(mol) singlefile.close()
def filter_precalculated_compounds(args, filters): outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True) for mol in pybel.readfile('sdf', args.input): for key, elem in filters.items(): # map the short description to the larger metadata names stored in the sdf file property = cheminfolib.ColumnNames.get(key, key) min = elem[0] max = elem[1] if float(mol.data[property]) >= float(min) and float( mol.data[property]) <= float(max): pass else: # leave the filter loop, because one filter constrained are not satisfied break else: # if the filter loop terminates in a normal way (no break) all filter rules are satisfied, so save the compound outfile.write(mol) outfile.close()
def patch_scores_sdf(outfile, scores): counter = 0 sdf_path = "{0}{1}{2}".format(work_dir, os.path.sep, outfile) log("Writing results to {0}".format(sdf_path)) sdf_file = pybel.Outputfile("sdf", sdf_path) for path in paths: for mol in pybel.readfile("sdf", os.path.sep.join([path, 'ligands.sdf'])): if counter in scores: score = scores[counter] # og("Score for record {0} is {1}".format(counter, score)) mol.data['TransFSScore'] = score sdf_file.write(mol) else: log("No score found for record", counter) counter += 1 sdf_file.close()
def process(input, output): docked = pybel.readfile("pdbqt", input) sdf = pybel.Outputfile("sdf", output, overwrite=True) for mol in docked: if mol.OBMol.HasData("REMARK"): remark = mol.OBMol.GetData("REMARK").GetValue() lines = remark.splitlines() tokens = lines[0].split() # add the score property add_property(mol, "SCORE", tokens[2]) # add the first RMSD property add_property(mol, "RMSD_LB", tokens[3]) # add the second RMSD property add_property(mol, "RMSD_UB", tokens[4]) sdf.write(mol) sdf.close()
def runvina(infile, outfile, receptor, tmp_file='test.pdbqt', vina=None): obconversion = OBConversion() obconversion.SetInFormat("sdf") obconversion.SetOutFormat("pdbqt") obmol = OBMol() notatend = obconversion.ReadFile(obmol, infile) obmol2 = OBMol(obmol) ofs = pybel.Outputfile("sdf", outfile, overwrite=True) pbar = tqdm() while notatend: pbar.update(1) if obconversion.WriteFile(obmol, tmp_file): try: x = subprocess.check_output([ vina, "--score_only", "--receptor", receptor, "--ligand", tmp_file ], shell=False) # x2 = subprocess.check_output(["/Users/austin/Downloads/rf-score-4/rf-score", "/Users/austin/Downloads/rf-score-4/pdbbind-2014-refined.rf", receptor, tmp_file]) # print(x2) mol2 = pybel.Molecule(obmol2) mol2.data.update({'AutodockVinaRescoreOnly': str(get_aff(x))}) ofs.write(mol2) except subprocess.CalledProcessError as e: print(e) ofs.write(obmol) except ValueError as e: print(e) ofs.write(obmol) else: print("error writing") obmol = OBMol() notatend = obconversion.Read(obmol) obmol2 = OBMol(obmol) pbar.close() print("FAILED")
def write_pdbqt(self): if hasattr(self, 'pdb_path'): pass else: self.write_pdb() self.pdbqt_path = self.pdb_path.with_suffix('.pdbqt') mols = list(pybel.readfile('pdb', self.pdb_path.__str__())) writer = pybel.Outputfile( 'pdbqt', self.pdbqt_path.__str__(), opt={'pdbqt': '-xh'}, overwrite=True ) for molecule in mols: writer.write(molecule) writer.close() os.remove(self.pdb_path.__str__()) delattr(self, 'pdb_path') cmd.reinitialize() return self.pdbqt_path
def print_output(args, rows): if args.oformat == "table": outfile = open(args.output, "w") requested_fields = (filter(lambda x: x not in ["[", "]", "'"], args.fetch)).split(", ") if args.header: outfile.write( "Identifier\t" + "\t".join([ColumnNames[key] for key in requested_fields]) + "\n") for row in rows: outfile.write( row["synonym"] + "\t" + "\t".join([str(row[key]) for key in requested_fields]) + "\n") elif args.oformat in ["sdf", "mol2"]: outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True) for row in rows: try: mol = pybel.readstring("sdf", row["mol"]) if args.oformat == "sdf": keys = filter(lambda x: x not in ["[", "]", "'"], args.fetch).split(", ") mol.data.update({ColumnNames["synonym"]: row["synonym"]}) if "inchi_key" in keys: keys = (", ".join(keys).replace( "inchi_key", "inchi_key_first, inchi_key_last")).split(", ") [ mol.data.update({ColumnNames[key]: row[key]}) for key in keys if key ] outfile.write(mol) except Exception: pass else: outfile = open(args.output, "w") outfile.write("\n".join( ["%s\t%s" % (row[args.oformat], row["synonym"]) for row in rows])) outfile.close()
def convert(self): if hasattr(self, 'path_pdbqt'): return self.path_pdbqt else: self.path_pdbqt = Protein(self.with_suffix('.pdbqt')) setattr(self.path_pdbqt, 'path_pdbqt', self.path_pdbqt) cmd.load(self.__str__()) cmd.remove('resn HOH') cmd.h_add(selection='acceptors or donors') cmd.save(self.__str__()) mols = list(pybel.readfile('pdb', self.path_clean.__str__())) writer = pybel.Outputfile( 'pdbqt', self.path_pdbqt.__str__(), opt={'pdbqt': '-xh'} ) for molecule in mols: writer.write(molecule) writer.close() cmd.reinitialize() os.remove(self) return self.path_pdbqt
def main(): parser = argparse.ArgumentParser( description="Change the title from a molecule file to metadata \ value of a given-id of the same molecule file.", ) parser.add_argument("--infile", "-i", required=True, help="path to the input file") parser.add_argument("--outfile", "-o", required=True, help="path to the output file") parser.add_argument( "--key", "-k", required=True, help= "the metadata key from the sdf file which should inlcude the new title", ) parser.add_argument("--random", "-r", action="store_true", help="Add random suffix to the title.") args = parser.parse_args() output = pybel.Outputfile("sdf", args.outfile, overwrite=True) for mol in pybel.readfile("sdf", args.infile): if args.key in mol.data: mol.title = mol.data[args.key] if args.random: suffix = "".join( random.choice(string.ascii_lowercase + string.digits) for _ in range(13)) mol.title += "__%s" % suffix output.write(mol) output.close()
def add_property(self, molecule, prop_name, prop_value, name='solvatum.sdf'): """ Adding further properties to the database. """ if not isinstance(molecule, str): raise TypeError('Molecule has to be given as a string') molecule = self.__name_id_handler(molecule) output = pybel.Outputfile('sdf', pathjoin(self.path, name), overwrite=True) for mol in self.sdf_file: if molecule == mol.title: mol.data[prop_name] = prop_value output.write(mol) output.close()
def mol_to_ase(self, molecule): """ Return the geometry of a molecule as ASE Atoms object. ASE has to be installed (it is even enough to clone the git repo https://gitlab.com/ase/ase and set the Python path). More informations: https://wiki.fysik.dtu.dk/ase/ """ if "ase" not in sys.modules: print( r"You have to install ASE before you can use this feature." + "\n" r"Go to https://wiki.fysik.dtu.dk/ase/ for more informations.") return None mol = self.one_mol_from_sdf(molecule) output = pybel.Outputfile('sdf', ".tmp.sdf", overwrite=True) output.write(mol) ase_atoms = sdf.read_sdf(".tmp.sdf") os.remove(".tmp.sdf") return ase_atoms
def Mold2( mol: Union[Union[pybel.Molecule, Chem.Mol], List[Union[pybel.Molecule, Chem.Mol]]] ) -> dict: """Calculate molecular descriptors with Mold2. Java must be installed and its path in the PATH environment variable. :param mol: either one or multiple molecules the fingerprints will be calculated from. """ # Verify configuration of Mold2 etp = ExternalToolsParser(path=None, required_fields=['path'], skip_errors=False) if 'Mold2' not in etp.tools.keys(): raise NotImplementedError("Mold2 is not set up.") # Check types is_list = isinstance(mol, list) if is_list: is_pybel = all(map(lambda x: isinstance(x, pybel.Molecule), mol)) is_rdkit = all(map(lambda x: isinstance(x, Chem.Mol), mol)) if not (is_pybel or is_rdkit): raise ValueError( 'All molecules must have the same type: either rdkit or pybel.' ) else: is_pybel = isinstance(mol, pybel.Molecule) is_rdkit = isinstance(mol, Chem.Mol) if not (is_pybel or is_rdkit): raise ValueError( 'molecule must either be rdkit or pybel molecule.') # Keep trace of molecules being processed out_indices, results = [], [] # Create temporary folder and write v2000 SD file running_dir = tempfile.mkdtemp() sdf_file = os.path.realpath(os.path.join(running_dir, 'molecules.sdf')) writer = pybel.Outputfile('sdf', sdf_file, overwrite=True) if is_list: for i, mol_ in enumerate(mol): try: if is_pybel: writer.write(mol_) else: writer.write( pybel.readstring('mol', Chem.MolToMolBlock(mol_))) out_indices.append(i) except IOError: pass else: out_indices.append(0) if is_pybel: writer.write(mol) # output.write(f'{Chem.MolToMolBlock(Chem.MolFromMolBlock(mol))}$$$$\n') else: writer.write(pybel.readstring('mol', Chem.MolToMolBlock(mol))) writer.close() # Prepare path to executable file params = etp.tools['Mold2'] if params['path'].startswith('.'): path_prefix = os.path.realpath( os.path.join(os.path.dirname(__file__), params['path'])) else: path_prefix = os.path.realpath(params['path']) if platform.startswith('win32'): mold2_bin = os.path.join(path_prefix, params['win_bin']) log_file = 'NUL' echo_cmd = 'echo.' elif platform.startswith('linux'): log_file = '/dev/null' echo_cmd = 'echo -e \'\n\'' if architecture()[0].startswith('32'): mold2_bin = os.path.join(path_prefix, params['lnx_bin']) else: mold2_bin = os.path.join(path_prefix, params['lnx64_bin']) else: Dispose(running_dir) raise RuntimeError(f'Platform ({platform}) not supported.') # Call Mold2 descriptors on output file out_file = os.path.join(running_dir, 'molecules.mold2descriptors.tsv') mold2_bin = os.path.realpath(mold2_bin) # Ensure separators are correct devnull = open(os.devnull, 'wb') command = f'{echo_cmd} | {mold2_bin} -i {sdf_file} -o {out_file} -r {log_file}' _ = subprocess.check_output(command, shell=True, stderr=devnull) # noqa: S602 devnull.close() data = pd.read_table(out_file).drop('Number', axis=1) # Get results Dispose(running_dir) # Remove temp dir # Rename columns results = data.to_dict(orient='records') if len(results) != len(out_indices): raise RuntimeError( f'Mold2 results contained {len(results)} but {len(out_indices)} were expected' ) return results
def execute(ligands_sdf, points_file, outfile): """ :param ligands_sdf: A SDF with the 3D molecules to test :param points_file: A file with the points to consider. :param outfile: The name of the file for the SDF output :return: """ points = [] # read the points with open(points_file, "r") as f: for line in f.readlines(): line.strip() if line: p = line.split() if len(p) == 3: points.append((float(p[0]), float(p[1]), float(p[2]))) log("Read points", p) continue log("Failed to read line:", line) log("Found", len(points), "atom points") sdf_writer = pybel.Outputfile("sdf", outfile, overwrite=True) count = 0 for mol in pybel.readfile("sdf", ligands_sdf): count += 1 if count % 50000 == 0: log("Processed", count) try: # print("Processing mol", mol.title) clone = pybel.Molecule(mol) clone.removeh() coords = [] for atom in clone.atoms: coords.append(atom.coords) p = 0 for point in points: p += 1 distances = [] for i in coords: # calculates distance based on cartesian coordinates distance = math.sqrt((point[0] - i[0])**2 + (point[1] - i[1])**2 + (point[2] - i[2])**2) distances.append(distance) # log("distance:", distance) min_distance = min(distances) # log('Min:', min_distance) # log(count, p, min_distance) mol.data["distance" + str(p)] = min_distance sdf_writer.write(mol) except Exception as e: log("Failed to handle molecule: " + str(e)) continue sdf_writer.close() log("Wrote", count, "molecules")
for j, row in df.iterrows(): newmol = lines[row["start_idx"]:(row["end_idx"] + 1)] with open(f"{row['ZINC']}.mol2", "w+") as f: for line in newmol: f.write(line) # Organize paths root = os.getcwd() print(f"The root directory is {root}") mol2dir = os.path.join(root, "zzz.problematic_molecules") dirname = all_mols_file.split(".")[0] exptdir = os.path.join(mol2dir, dirname) print(f"mol2 files will be stored at {exptdir}") # Move mol2 files to their directory os.makedirs(exptdir, exist_ok=True) all_mol2 = glob.glob("*.mol2") all_mol2.remove(all_mols_file) for elem in all_mol2: shutil.move(os.path.join(root, f"{elem}"), os.path.join(exptdir, f"{elem}")) # Change directory and create SDF files os.chdir(exptdir) for elem in all_mol2: for mol in pb.readfile("mol2", f"{elem}"): outsdf = pb.Outputfile("sdf", f"{elem[:-5]}.sdf", overwrite=True) outsdf.write(mol) outsdf.close() os.chdir(root)
def write_json_as_sdf(jsonfile, sdfile): input, suppl = obabel_utils.default_open_input(jsonfile, 'json') with pybel.Outputfile('sdf', sdfile) as output: for mol in suppl: output.write(mol)
def JCompoundMapper(mol: Union[Union[pybel.Molecule, Chem.Mol], List[Union[pybel.Molecule, Chem.Mol]]], bits: int = 2048) -> dict: """Calculate molecular fingerprints with JCompoundMapper. Java must be installed and its path in the PATH environment variable. :param mol: either one or multiple molecules the fingerprints will be calculated from. :param bits: length of the bitstring. """ # Verify configuration of JCompoundMapper etp = ExternalToolsParser(path=None, required_fields=['path'], skip_errors=False) if 'JCompoundMapper' not in etp.tools.keys(): raise NotImplementedError("JCompoundMapper is not set up.") params = etp.tools['JCompoundMapper'] jar_path = os.path.realpath( os.path.join(os.path.dirname(__file__), params['path'], params['bin'])) # Check types is_list = isinstance(mol, list) if is_list: is_pybel = all(map(lambda x: isinstance(x, pybel.Molecule), mol)) is_rdkit = all(map(lambda x: isinstance(x, Chem.Mol), mol)) if not (is_pybel or is_rdkit): raise ValueError( 'All molecules must have the same type: either rdkit or pybel.' ) else: is_pybel = isinstance(mol, pybel.Molecule) is_rdkit = isinstance(mol, Chem.Mol) if not (is_pybel or is_rdkit): raise ValueError( 'molecule must either be rdkit or pybel molecule.') # Keep trace of molecules being processed out_indices, results = [], {} # Create temporary folder running_dir = tempfile.mkdtemp() sdf_file = os.path.join(running_dir, 'molecules.sdf') if is_pybel: outputmol = pybel.Outputfile('sdf', sdf_file, overwrite=True) else: # RDKit molecule outputmol = Chem.rdmolfiles.SDWriter(sdf_file) # Write molecule(s) if is_list: for i, mol_ in enumerate(mol): try: outputmol.write(mol_) out_indices.append(i) except IOError: pass else: out_indices.append(0) outputmol.write(mol) outputmol.close() # Call JCompoundMapper on output file for each fingerprint type fingerprints = [ 'DFS', 'ASP', 'AP2D', 'AT2D', 'CATS2D', 'PHAP2POINT2D', 'PHAP3POINT2D', 'SHED', 'RAD2D', 'LSTAR', 'AP3D', 'AT3D', 'CATS3D', 'PHAP2POINT3D', 'PHAP3POINT3D', 'RAD3D' ] for fingerprint in fingerprints: out_file = os.path.join(running_dir, f'fp_{fingerprint}.txt') command = f'java -jar {jar_path} -f {sdf_file} -c {fingerprint} -ff LIBSVM_SPARSE -o {out_file} -hs {bits}' retcode = subprocess.call(command, shell=False) # noqa: S603 if retcode: # Error occured warnings.warn( f'JCompoundMapper did not succeed to run properly for fingerprint {fingerprint}.' ) results[fingerprint] = [[] for _ in range(len(out_indices))] else: fp_vectors = read_libsvmsparse(out_file, bits) # Get results if len(fp_vectors) != len( out_indices): # Not all processed molecules in result Dispose(running_dir) raise RuntimeError( f'JCompoundMapper results contained {len(fp_vectors)}' f' molecules but {len(out_indices)} were expected') results[fingerprint] = fp_vectors if is_list: # Transform a dictionary of lists to a list of dictionaries results = [dict(zip(results, t)) for t in zip(*results.values())] Dispose(running_dir) return results