def test3(self):
        pdb_3 = self.pdb.filter(Resolution(34.99, 35.01))
        results_3 = pdb_3.keys().collect()

        self.assertFalse('2ONX' in results_3)
        self.assertFalse('2OLX' in results_3)
        self.assertFalse('3REC' in results_3)
        self.assertFalse('1LU3' in results_3)
    def test2(self):
        pdb_2 = self.pdb.filter(Resolution(1.41, 1.43))
        results_2 = pdb_2.keys().collect()

        self.assertFalse('2ONX' in results_2)
        self.assertTrue('2OLX' in results_2)
        self.assertFalse('3REC' in results_2)
        self.assertFalse('1LU3' in results_2)
    def test1(self):
        pdb_1 = self.pdb.filter(Resolution(1.51, 1.53))
        results_1 = pdb_1.keys().collect()

        self.assertTrue('2ONX' in results_1)
        self.assertFalse('2OLX' in results_1)
        self.assertFalse('3REC' in results_1)
        self.assertFalse('1LU3' in results_1)
path = "../../resources/mmtf_full_sample/"
fraction = 0.5
seed = 123

pdb = mmtfReader.read_sequence_file(path, sc, fraction=fraction, seed=seed)

count = pdb.count()

print(f'number of pdb entries read : {count}')

# ## Retain high resolution X-ray structures

# In[4]:

pdb = pdb.filter(ExperimentalMethods(
    ExperimentalMethods.X_RAY_DIFFRACTION)).filter(Resolution(0, 2.0)).filter(
        RFree(0, 2.0))

print(f'number of pdb entries left : {pdb.count()}')

# ## Visualize Structures

# In[5]:

structures = pdb.keys().collect()
view_structure(structures)

# ## Save this subset in a Hadoop Sequence File

# In[7]:
# ## Read in MMTF Files

# In[3]:

path = "../../resources/mmtf_reduced_sample/"

pdb = mmtfReader.read_sequence_file(path, sc)

# ## Filter by resolution
#
# Entires without resolution values (eg. NMR structures) will be filtered out

# In[4]:

pdb = pdb.filter(Resolution(0.0, 2.0))

# ## Count number of entires

# In[5]:

count = pdb.count()

print(f"Number of structures  : {count}")

# ## Visualize Structures

# In[6]:

structure_names = pdb.keys().collect()
view_structure(structure_names)
Exemple #6
0
# In[2]:

# input parameters
resolution = 2.0
minInteractions = 2
maxInteractions = 4
distanceCutoff = 3.0
bFactorCutoff = 1.645
includeWaters = True

# ## Read PDB and filter by resolution and only include proteins

# In[3]:

pdb = mmtfReader.read_sequence_file(path, sc)
pdb = pdb.filter(Resolution(minResolution=0.0, maxResolution=2.0)).filter(
    ContainsLProteinChain(exclusive=True))

# ## Setup criteria for metal interactions

# In[4]:

interactions_filter = InteractionFilter()
interactions_filter.set_distance_cutoff(3.0)
interactions_filter.set_normalized_b_factor_cutoff(1.645)
interactions_filter.set_min_interactions(2)
interactions_filter.set_max_interactions(4)
interactions_filter.set_query_groups(True, ["HOH"])
interactions_filter.set_query_elements(True, "O")  # Only use water oxygen
interactions_filter.set_target_elements(True, ["O", "N", "S"])