def test_no_similarity(self): filename_a = os.path.join("data", "276.pdb") filename_b = os.path.join("data", "4629.pdb") activesite_a = io.read_active_site(filename_a) activesite_b = io.read_active_site(filename_b) seq_a = cluster.convert_to_aa_str(activesite_a.residues) seq_b = cluster.convert_to_aa_str(activesite_b.residues) # should be 8 because there are 8 edit operations between the strings self.assertEqual(cluster.compute_similarity(seq_a, seq_b), 8)
def test_identical_similarity(self): filename_a = os.path.join("data", "276.pdb") filename_b = os.path.join("data", "276.pdb") activesite_a = io.read_active_site(filename_a) activesite_b = io.read_active_site(filename_b) seq_a = cluster.convert_to_aa_str(activesite_a.residues) seq_b = cluster.convert_to_aa_str(activesite_b.residues) # should be 0 because the strings are identical self.assertEqual(cluster.compute_similarity(seq_a, seq_b), 0)
def test_residues(filename, names, numbers): filepath = os.path.join("data", filename) activesite = io.read_active_site(filepath) assert [residue.type for residue in activesite.residues] == names assert [residue.number for residue in activesite.residues] == numbers
def test_atoms(filename, residue_number, atoms, xs, ys, zs): filepath = os.path.join("data", filename) activesite = io.read_active_site(filepath) residue = activesite.residues[residue_number] assert [atom.type for atom in residue.atoms] == atoms assert [atom.coords for atom in residue.atoms] == list(zip(xs, ys, zs))
def test_partition_clustering_one(self): # tractable subset pdb_ids = [276, 4629, 10701] active_sites = [] for id in pdb_ids: filepath = os.path.join("data", "%i.pdb" % id) active_sites.append(io.read_active_site(filepath)) clusters = cluster.cluster_by_partitioning(active_sites, 1)[0] # Should all be in one cluster self.assertClustersEqual(clusters, [["276", "10701", "4629"]])
def test_partition_clustering_individual(self): # tractable subset pdb_ids = [276, 4629, 10701] active_sites = [] for id in pdb_ids: filepath = os.path.join("data", "%i.pdb" % id) active_sites.append(io.read_active_site(filepath)) clusters = cluster.cluster_by_partitioning(active_sites, len(pdb_ids))[0] # Should each be in their own cluster. self.assertClustersEqual(clusters, [["276"], ["10701"], ["4629"]])
def test_hierarchical_clustering_two(self): # tractable subset pdb_ids = [37438, 55996, 81859] # Try it four times, since test is flaky (because of inherent randomness) active_sites = [] for id in pdb_ids: filepath = os.path.join("data", "%i.pdb" % id) active_sites.append(io.read_active_site(filepath)) clusters = cluster.cluster_hierarchically(active_sites, 2)[0] # 37438 should be clustered with 81859 self.assertClustersEqual(clusters, [["37438", "55996"], ["81859"]])
def test_partition_clustering_two(self): # tractable subset pdb_ids = [37438, 55996, 81859] # Try it four times, since test is flaky (because of inherent randomness) # Normally would use flaky. for i in range(4): try: active_sites = [] for id in pdb_ids: filepath = os.path.join("data", "%i.pdb" % id) active_sites.append(io.read_active_site(filepath)) clusters = cluster.cluster_by_partitioning(active_sites, 2)[0] # 37438 should be clustered with 81859 self.assertClustersEqual(clusters, [["37438", "55996"], ["81859"]]) break except AssertionError: pass else: raise AssertionError("Partitioning failed on all 4 attempts")