def test2(self): pdbIds = ["5NV3"] self.pdb = download_mmtf_files(pdbIds, self.sc) pdb_2 = self.pdb.flatMap(StructureToBioassembly()) \ .flatMap(StructureToProteinDimers(8, 20, False, True)) self.assertTrue(pdb_2.count() == 12)
def test1(self): pdbIds = ["1I1G"] self.pdb = download_mmtf_files(pdbIds) pdb_1 = self.pdb.flatMap(StructureToBioassembly()) \ .flatMap(StructureToProteinDimers(8, 20, False, True)) self.assertTrue(pdb_1.count() == 4)
def test4(self): pdbIds = ["1BZ5"] # C5-B4 # C6-B3 # D7-A2 # D8-A1 # E10-E9 self.pdb = download_mmtf_files(pdbIds, self.sc) pdb_4 = self.pdb.flatMap(StructureToBioassembly()) \ .flatMap(StructureToProteinDimers(9, 20, False, True)) self.assertTrue(pdb_3.count() == 5)
def test3(self): pdbIds = ["4GIS"] # A3-A2 # A4-A1 # B5-A1 # B6-A2 # B6-B5 # B7-A3 # B7-A4 # B8-A4 # B8-B7 self.pdb = download_mmtf_files(pdbIds, self.sc) pdb_3 = self.pdb.flatMap(StructureToBioassembly()) \ .flatMap(StructureToProteinDimers(8, 20, False, True)) self.assertTrue(pdb_3.count() == 9)
def test1(self): pdb_1 = self.pdb.flatMap(StructureToBioassembly()) results_1 = pdb_1.keys().collect() self.assertTrue(len(results_1) == 2)
protein = mmtfReader.download_mmtf_files(["1STP"], sc) # ## Map protein structure to Bioassembly, then map to protein dimers # In[4]: cutoffDistance = 8.0 contacts = 20 useAllAtoms = False exclusive = True dimers = protein.flatMap(StructureToBioassembly()) .flatMap(StructureToProteinDimers(cutoffDistance, contacts, useAllAtoms, exclusive)) # ## Count number of structures # In[5]: print(f"Number of structures : {dimers.count()}") # ## Terminate Spark # In[6]: