def do_atomconv_featurize(lig_files, rec_files, labels): ''' Parameters ---------- lig_files: array_like n_examples list of ligand file names for training rec_files: array_like n_examples list of receptor file names for training labels: array_like n_examples list of labels Returns ---------- features: array_like n_examples X feature_dims failures: array_like list of example indices that failed to featurize ''' frag1_num_atoms = 150 # for ligand atoms frag2_num_atoms = 27000 # for protein atoms complex_num_atoms = frag1_num_atoms + frag2_num_atoms neighbor_cutoff = 4 max_num_neighbors = 4 featurizer = AtomicConvFeaturizer( labels=labels, frag1_num_atoms=frag1_num_atoms, frag2_num_atoms=frag2_num_atoms, complex_num_atoms=complex_num_atoms, neighbor_cutoff=neighbor_cutoff, max_num_neighbors=max_num_neighbors, batch_size=64) print("Featurizing Complexes") return featurizer.featurize_complexes(lig_files, rec_files)
def test_feature_generation(self): """Test if featurization works using AtomicConvFeaturizer.""" dir_path = os.path.dirname(os.path.realpath(__file__)) ligand_file = os.path.join(dir_path, "data/3zso_ligand_hyd.pdb") protein_file = os.path.join(dir_path, "data/3zso_protein.pdb") # Pulled from PDB files. For larger datasets with more PDBs, would use # max num atoms instead of exact. frag1_num_atoms = 44 # for ligand atoms frag2_num_atoms = 2336 # for protein atoms complex_num_atoms = 2380 # in total max_num_neighbors = 4 # Cutoff in angstroms neighbor_cutoff = 4 labels = np.array([0, 0]) featurizer = AtomicConvFeaturizer(labels=labels, batch_size=1, epochs=1, frag1_num_atoms=frag1_num_atoms, frag2_num_atoms=frag2_num_atoms, complex_num_atoms=complex_num_atoms, max_num_neighbors=max_num_neighbors, neighbor_cutoff=neighbor_cutoff) features, _ = featurizer.featurize_complexes( [ligand_file, ligand_file], [protein_file, protein_file])
def test_feature_generation(self): """Test if featurization works using AtomicConvFeaturizer.""" dir_path = os.path.dirname(os.path.realpath(__file__)) ligand_file = os.path.join(dir_path, "data/3zso_ligand_hyd.pdb") protein_file = os.path.join(dir_path, "data/3zso_protein.pdb") # Pulled from PDB files. For larger datasets with more PDBs, would use # max num atoms instead of exact. frag1_num_atoms = 44 # for ligand atoms frag2_num_atoms = 2336 # for protein atoms complex_num_atoms = 2380 # in total max_num_neighbors = 4 # Cutoff in angstroms neighbor_cutoff = 4 labels = np.array([0, 0]) featurizer = AtomicConvFeaturizer( labels=labels, batch_size=1, epochs=1, frag1_num_atoms=frag1_num_atoms, frag2_num_atoms=frag2_num_atoms, complex_num_atoms=complex_num_atoms, max_num_neighbors=max_num_neighbors, neighbor_cutoff=neighbor_cutoff) features, _ = featurizer.featurize_complexes([ligand_file, ligand_file], [protein_file, protein_file])