Exemple #1
0
def do_atomconv_featurize(lig_files, rec_files, labels):
    '''
    Parameters
    ----------
    lig_files: array_like
        n_examples list of ligand file names for training
    rec_files: array_like
        n_examples list of receptor file names for training
    labels: array_like
        n_examples list of labels

    Returns
    ----------
    features: array_like
        n_examples X feature_dims
    failures: array_like
        list of example indices that failed to featurize
    '''
    frag1_num_atoms = 150  # for ligand atoms
    frag2_num_atoms = 27000  # for protein atoms
    complex_num_atoms = frag1_num_atoms + frag2_num_atoms
    neighbor_cutoff = 4
    max_num_neighbors = 4

    featurizer = AtomicConvFeaturizer(
        labels=labels,
        frag1_num_atoms=frag1_num_atoms,
        frag2_num_atoms=frag2_num_atoms,
        complex_num_atoms=complex_num_atoms,
        neighbor_cutoff=neighbor_cutoff,
        max_num_neighbors=max_num_neighbors,
        batch_size=64)
    
    print("Featurizing Complexes")
    return featurizer.featurize_complexes(lig_files, rec_files)
Exemple #2
0
    def test_feature_generation(self):
        """Test if featurization works using AtomicConvFeaturizer."""
        dir_path = os.path.dirname(os.path.realpath(__file__))
        ligand_file = os.path.join(dir_path, "data/3zso_ligand_hyd.pdb")
        protein_file = os.path.join(dir_path, "data/3zso_protein.pdb")
        # Pulled from PDB files. For larger datasets with more PDBs, would use
        # max num atoms instead of exact.

        frag1_num_atoms = 44  # for ligand atoms
        frag2_num_atoms = 2336  # for protein atoms
        complex_num_atoms = 2380  # in total
        max_num_neighbors = 4
        # Cutoff in angstroms
        neighbor_cutoff = 4

        labels = np.array([0, 0])

        featurizer = AtomicConvFeaturizer(labels=labels,
                                          batch_size=1,
                                          epochs=1,
                                          frag1_num_atoms=frag1_num_atoms,
                                          frag2_num_atoms=frag2_num_atoms,
                                          complex_num_atoms=complex_num_atoms,
                                          max_num_neighbors=max_num_neighbors,
                                          neighbor_cutoff=neighbor_cutoff)

        features, _ = featurizer.featurize_complexes(
            [ligand_file, ligand_file], [protein_file, protein_file])
  def test_feature_generation(self):
    """Test if featurization works using AtomicConvFeaturizer."""
    dir_path = os.path.dirname(os.path.realpath(__file__))
    ligand_file = os.path.join(dir_path, "data/3zso_ligand_hyd.pdb")
    protein_file = os.path.join(dir_path, "data/3zso_protein.pdb")
    # Pulled from PDB files. For larger datasets with more PDBs, would use
    # max num atoms instead of exact.

    frag1_num_atoms = 44  # for ligand atoms
    frag2_num_atoms = 2336  # for protein atoms
    complex_num_atoms = 2380  # in total
    max_num_neighbors = 4
    # Cutoff in angstroms
    neighbor_cutoff = 4

    labels = np.array([0, 0])

    featurizer = AtomicConvFeaturizer(
        labels=labels,
        batch_size=1,
        epochs=1,
        frag1_num_atoms=frag1_num_atoms,
        frag2_num_atoms=frag2_num_atoms,
        complex_num_atoms=complex_num_atoms,
        max_num_neighbors=max_num_neighbors,
        neighbor_cutoff=neighbor_cutoff)

    features, _ = featurizer.featurize_complexes([ligand_file, ligand_file],
                                                 [protein_file, protein_file])