def extract_one(extractor, s1=None, s2=None): results = extractor.group_by_reactant_charge_pair() for charge in [(-1, 0), (-1, 1), (0, 1)]: reactions = results[charge] energies = [] for rxn1, rxn2 in reactions: e_diff = rxn2.get_free_energy() - rxn1.get_free_energy() energies.append(e_diff) outname = "~/Applications/db_access/mol_builder/reactant_e_diff/" outname += "reactant_e_diff_{}{}_{}_{}.pdf".format(s1, s2, *charge) create_directory(outname) outname = to_path(outname) plot_hist(energies, outname, s1, s2, *charge)
def plot_molecules(self, prefix=Path.cwd()): """ Plot molecules to .png and write .sdf and .pdb files. Args: prefix (Path): directory path for the created files. """ prefix = to_path(prefix) if prefix.exists(): if not prefix.is_dir(): raise ValueError( f"Expect `prefix` be a path to a directory, but got {prefix}" ) else: create_directory(prefix, path_is_directory=True) for i in ["png", "sdf", "pdb"]: create_directory(prefix.joinpath(f"mol_{i}"), path_is_directory=True) create_directory(prefix.joinpath(f"mol_{i}_id"), path_is_directory=True) for m in self.molecules: fname1 = prefix.joinpath( "mol_png/{}_{}_{}_{}.png".format( m.formula, m.charge, m.id, str(m.free_energy).replace(".", "dot")), ) m.draw(filename=fname1, show_atom_idx=True) fname2 = prefix.joinpath( "mol_png_id/{}_{}_{}_{}.png".format( m.id, m.formula, m.charge, str(m.free_energy).replace(".", "dot")), ) shutil.copyfile(fname1, fname2) for ext in ["sdf", "pdb"]: fname1 = prefix.joinpath( "mol_{}/{}_{}_{}_{}.{}".format( ext, m.formula, m.charge, m.id, str(m.free_energy).replace(".", "dot"), ext, ), ) m.write(fname1, format=ext) fname2 = prefix.joinpath( "mol_{}_id/{}_{}_{}_{}.{}".format( ext, m.id, m.formula, m.charge, str(m.free_energy).replace(".", "dot"), ext, ), ) create_directory(fname2) shutil.copyfile(fname1, fname2)
def write(self, filename=None, name=None, format="sdf", kekulize=True, v3000=True): """Write a molecule to file or as string using rdkit. Args: filename (str): name of the file to write the output. If None, return the output as string. name (str): name of a molecule. If `file_format` is sdf, this is the first line the molecule block in the sdf. format (str): format of the molecule, supporting: sdf, pdb, and smi. kekulize (bool): whether to kekulize the mol if format is `sdf` v3000 (bool): whether to force v3000 form if format is `sdf` """ if filename is not None: create_directory(filename) filename = str(to_path(filename)) name = str(self.id) if name is None else name self.rdkit_mol.SetProp("_Name", name) if format == "sdf": if filename is None: sdf = Chem.MolToMolBlock(self.rdkit_mol, kekulize=kekulize, forceV3000=v3000) return sdf + "$$$$\n" else: return Chem.MolToMolFile(self.rdkit_mol, filename, kekulize=kekulize, forceV3000=v3000) elif format == "pdb": if filename is None: sdf = Chem.MolToPDBBlock(self.rdkit_mol) return sdf + "$$$$\n" else: return Chem.MolToPDBFile(self.rdkit_mol, filename) elif format == "smi": return Chem.MolToSmiles(self.rdkit_mol) else: raise ValueError(f"format {format} currently not supported")
def draw_with_bond_note(self, bond_note, filename="mol.png", show_atom_idx=True): """ Draw molecule using rdkit and show bond annotation, e.g. bond energy. Args: bond_note (dict): {bond_index: note}. The note to show for the corresponding bond. filename (str): path to the save the generated image. If `None` the molecule is returned and can be viewed in Jupyter notebook. """ m = self.draw(show_atom_idx=show_atom_idx) # set bond annotation highlight_bonds = [] for bond, note in bond_note.items(): if isinstance(note, (float, np.floating)): note = "{:.3g}".format(note) idx = m.GetBondBetweenAtoms(*bond).GetIdx() m.GetBondWithIdx(idx).SetProp("bondNote", note) highlight_bonds.append(idx) # set highlight color bond_colors = { b: (192 / 255, 192 / 255, 192 / 255) for b in highlight_bonds } d = rdMolDraw2D.MolDraw2DCairo(400, 300) # smaller font size d.SetFontSize(0.8 * d.FontSize()) rdMolDraw2D.PrepareAndDrawMolecule(d, m, highlightBonds=highlight_bonds, highlightBondColors=bond_colors) d.FinishDrawing() create_directory(filename) with open(to_path(filename), "wb") as f: f.write(d.GetDrawingText())
def draw(self, filename=None, show_atom_idx=False): """ Draw the molecule. Args: filename (str): path to the save the generated image. If `None` the molecule is returned and can be viewed in Jupyter notebook. """ m = copy.deepcopy(self.rdkit_mol) AllChem.Compute2DCoords(m) if show_atom_idx: for a in m.GetAtoms(): a.SetAtomMapNum(a.GetIdx() + 1) # d.drawOptions().addAtomIndices = True if filename is None: return m else: create_directory(filename) filename = str(to_path(filename)) Draw.MolToFile(m, filename)
def write_group_isomorphic_to_file(self, filename): """Write molecules statistics""" def group_isomorphic(molecules): """ Group molecules Args: molecules: a list of Molecules. Returns: A list of list, with inner list of isomorphic molecules. """ groups = [] for m in molecules: find_iso = False for g in groups: iso_m = g[0] if m.mol_graph.isomorphic_to(iso_m.mol_graph): g.append(m) find_iso = True break if not find_iso: groups.append([m]) return groups groups = group_isomorphic(self.molecules) # statistics or charges of mols charges = defaultdict(int) for m in self.molecules: charges[m.charge] += 1 # statistics of isomorphic mols sizes = defaultdict(int) for g in groups: sizes[len(g)] += 1 # statistics of charge combinations charge_combinations = defaultdict(int) for g in groups: chg = [m.charge for m in g] for ij in itertools.combinations(chg, 2): ij = tuple(sorted(ij)) charge_combinations[ij] += 1 create_directory(filename) with open(to_path(filename), "w") as f: f.write("Number of molecules: {}\n\n".format(len(self.molecules))) f.write("Molecule charge state statistics.\n") f.write("# charge state number of molecules:\n") for k, v in charges.items(): f.write("{} {}\n".format(k, v)) f.write("Number of isomorphic groups: {}\n\n".format(len(groups))) f.write( "Molecule isomorphic group size statistics. (i.e. the number of " "isomorphic molecules that have a specific number of charge state\n" ) f.write("# size number of molecules:\n") for k, v in sizes.items(): f.write("{} {}\n".format(k, v)) f.write("# charge combinations number:\n") for k, v in charge_combinations.items(): f.write("{} {}\n".format(k, v)) for g in groups: for m in g: f.write("{}_{}_{} ".format(m.formula, m.id, m.charge)) f.write("\n")