def align(self, smiles): if isinstance(smiles, str): amol = Chem.MolFromSmiles(smiles) add_coords(amol) if self.inplace: self.add_mols() self.data["Mol"].apply( lambda x: Chem.GenerateDepictionMatching2DStructure(x, amol)) else: result = self.add_mols() result.data["Mol"].apply( lambda x: Chem.GenerateDepictionMatching2DStructure(x, amol)) return result
def _apply(x): if show_prog: pb.inc() mol = self.mol_method(x) if mol: add_coords(mol, force=force)
def read_sdf(fn, store_mol_as="Mol_b64", gen2d=False): """Create a MolFrame instance from an SD file (can be gzipped (fn ends with ``.gz``)). Arguments: store_mol_as: "Mol_b64" or "Smiles" """ if store_mol_as not in ["Mol_b64", "Smiles"]: print("* Mols are stored as Mol_b64") store_mol_as = "Mol_b64" d = {} ctr = 0 first_mol = True if not isinstance(fn, list): fn = [fn] for f in fn: do_close = True if isinstance(f, str): if f.endswith(".gz"): file_obj = gzip.open(f, mode="rb") else: file_obj = open(f, "rb") else: file_obj = f do_close = False reader = Chem.ForwardSDMolSupplier(file_obj) for mol in reader: ctr += 1 if not mol: print(ctr, file=sys.stderr) continue if first_mol: first_mol = False for prop in mol.GetPropNames(): if prop in [store_mol_as, "order"]: continue d[prop] = [] d_keys = set(d.keys()) d[store_mol_as] = [] mol_props = set() for prop in mol.GetPropNames(): if prop in d_keys: mol_props.add(prop) d[prop].append(get_value(mol.GetProp(prop))) mol.ClearProp(prop) # append NAN to the missing props that were not in the mol: missing_props = d_keys - mol_props for prop in missing_props: d[prop].append(np.nan) if gen2d: add_coords(mol, force=True) if store_mol_as == "Smiles": smi = Chem.MolToSmiles(mol) d["Smiles"].append(smi) else: mol_b64 = b64.b64encode(pickle.dumps(mol)).decode() d["Mol_b64"].append(mol_b64) if do_close: file_obj.close() for k in d.keys(): print(len(d[k])) result = MolFrame() result.data = pd.DataFrame(d) print_log(result.data, "read SDF") return result