def test3(self): pdb_3 = self.pdb.filter(Resolution(34.99, 35.01)) results_3 = pdb_3.keys().collect() self.assertFalse('2ONX' in results_3) self.assertFalse('2OLX' in results_3) self.assertFalse('3REC' in results_3) self.assertFalse('1LU3' in results_3)
def test2(self): pdb_2 = self.pdb.filter(Resolution(1.41, 1.43)) results_2 = pdb_2.keys().collect() self.assertFalse('2ONX' in results_2) self.assertTrue('2OLX' in results_2) self.assertFalse('3REC' in results_2) self.assertFalse('1LU3' in results_2)
def test1(self): pdb_1 = self.pdb.filter(Resolution(1.51, 1.53)) results_1 = pdb_1.keys().collect() self.assertTrue('2ONX' in results_1) self.assertFalse('2OLX' in results_1) self.assertFalse('3REC' in results_1) self.assertFalse('1LU3' in results_1)
path = "../../resources/mmtf_full_sample/" fraction = 0.5 seed = 123 pdb = mmtfReader.read_sequence_file(path, sc, fraction=fraction, seed=seed) count = pdb.count() print(f'number of pdb entries read : {count}') # ## Retain high resolution X-ray structures # In[4]: pdb = pdb.filter(ExperimentalMethods( ExperimentalMethods.X_RAY_DIFFRACTION)).filter(Resolution(0, 2.0)).filter( RFree(0, 2.0)) print(f'number of pdb entries left : {pdb.count()}') # ## Visualize Structures # In[5]: structures = pdb.keys().collect() view_structure(structures) # ## Save this subset in a Hadoop Sequence File # In[7]:
# ## Read in MMTF Files # In[3]: path = "../../resources/mmtf_reduced_sample/" pdb = mmtfReader.read_sequence_file(path, sc) # ## Filter by resolution # # Entires without resolution values (eg. NMR structures) will be filtered out # In[4]: pdb = pdb.filter(Resolution(0.0, 2.0)) # ## Count number of entires # In[5]: count = pdb.count() print(f"Number of structures : {count}") # ## Visualize Structures # In[6]: structure_names = pdb.keys().collect() view_structure(structure_names)
# In[2]: # input parameters resolution = 2.0 minInteractions = 2 maxInteractions = 4 distanceCutoff = 3.0 bFactorCutoff = 1.645 includeWaters = True # ## Read PDB and filter by resolution and only include proteins # In[3]: pdb = mmtfReader.read_sequence_file(path, sc) pdb = pdb.filter(Resolution(minResolution=0.0, maxResolution=2.0)).filter( ContainsLProteinChain(exclusive=True)) # ## Setup criteria for metal interactions # In[4]: interactions_filter = InteractionFilter() interactions_filter.set_distance_cutoff(3.0) interactions_filter.set_normalized_b_factor_cutoff(1.645) interactions_filter.set_min_interactions(2) interactions_filter.set_max_interactions(4) interactions_filter.set_query_groups(True, ["HOH"]) interactions_filter.set_query_elements(True, "O") # Only use water oxygen interactions_filter.set_target_elements(True, ["O", "N", "S"])