def _featurize_complexes(self, df, featurizer, parallel=True, worker_pool=None): """Generates circular fingerprints for dataset.""" protein_pdbs = list(df["protein_pdb"]) ligand_pdbs = list(df["ligand_pdb"]) complexes = zip(ligand_pdbs, protein_pdbs) def featurize_wrapper(ligand_protein_pdb_tuple): ligand_pdb, protein_pdb = ligand_protein_pdb_tuple print("Featurizing %s" % ligand_pdb[0:2]) molecule_features = featurizer.featurize_complexes([ligand_pdb], [protein_pdb]) return molecule_features if worker_pool is None: features = [] for ligand_protein_pdb_tuple in zip(ligand_pdbs, protein_pdbs): features.append(featurize_wrapper(ligand_protein_pdb_tuple)) else: if worker_pool is None: worker_pool = ProcessingPool(mp.cpu_count()) features = worker_pool.map(featurize_wrapper, zip(ligand_pdbs, protein_pdbs)) else: features = worker_pool.map_sync(featurize_wrapper, zip(ligand_pdbs, protein_pdbs)) #features = featurize_wrapper(zip(ligand_pdbs, protein_pdbs)) df[featurizer.__class__.__name__] = list(features)
def _featurize_compounds(self, df, featurizer, parallel=True, worker_pool=None): """Featurize individual compounds. Given a featurizer that operates on individual chemical compounds or macromolecules, compute & add features for that compound to the features dataframe """ sample_smiles = df["smiles"].tolist() if worker_pool is None: features = [] for ind, smiles in enumerate(sample_smiles): if ind % self.log_every_n == 0: log("Featurizing sample %d" % ind, self.verbose) mol = Chem.MolFromSmiles(smiles) features.append(featurizer.featurize([mol])) else: def featurize_wrapper(smiles, dilled_featurizer): print("Featurizing %s" % smiles) mol = Chem.MolFromSmiles(smiles) featurizer = dill.loads(dilled_featurizer) feature = featurizer.featurize([mol]) return feature if worker_pool is None: dilled_featurizer = dill.dumps(featurizer) worker_pool = ProcessingPool(mp.cpu_count()) featurize_wrapper_partial = partial( featurize_wrapper, dilled_featurizer=dilled_featurizer) features = [] for smiles in sample_smiles: features.append(featurize_wrapper_partial(smiles)) else: features = worker_pool.map_sync(featurize_wrapper, sample_smiles) df[featurizer.__class__.__name__] = features
def _featurize_compounds(self, df, featurizer, parallel=True, worker_pool=None): """Featurize individual compounds. Given a featurizer that operates on individual chemical compounds or macromolecules, compute & add features for that compound to the features dataframe """ sample_smiles = df["smiles"].tolist() if worker_pool is None: features = [] for ind, smiles in enumerate(sample_smiles): if ind % self.log_every_n == 0: log("Featurizing sample %d" % ind, self.verbose) mol = Chem.MolFromSmiles(smiles) features.append(featurizer.featurize([mol])) else: def featurize_wrapper(smiles, dilled_featurizer): print("Featurizing %s" % smiles) mol = Chem.MolFromSmiles(smiles) featurizer = dill.loads(dilled_featurizer) feature = featurizer.featurize([mol]) return feature if worker_pool is None: dilled_featurizer = dill.dumps(featurizer) worker_pool = ProcessingPool(mp.cpu_count()) featurize_wrapper_partial = partial(featurize_wrapper, dilled_featurizer=dilled_featurizer) features = [] for smiles in sample_smiles: features.append(featurize_wrapper_partial(smiles)) else: features = worker_pool.map_sync(featurize_wrapper, sample_smiles) df[featurizer.__class__.__name__] = features