Esempio n. 1
0
    def test1(self):
        pdbIds = ["1I1G"]
        self.pdb = download_mmtf_files(pdbIds)

        pdb_1 = self.pdb.flatMap(StructureToBioassembly()) \
                        .flatMap(StructureToProteinDimers(8, 20, False, True))

        self.assertTrue(pdb_1.count() == 4)
Esempio n. 2
0
    def test2(self):
        pdbIds = ["5NV3"]
        self.pdb = download_mmtf_files(pdbIds, self.sc)

        pdb_2 = self.pdb.flatMap(StructureToBioassembly()) \
                        .flatMap(StructureToProteinDimers(8, 20, False, True))

        self.assertTrue(pdb_2.count() == 12)
Esempio n. 3
0
    def test4(self):
        pdbIds = ["1BZ5"]
        # C5-B4
        # C6-B3
        # D7-A2
        # D8-A1
        # E10-E9
        self.pdb = download_mmtf_files(pdbIds, self.sc)

        pdb_4 = self.pdb.flatMap(StructureToBioassembly()) \
                        .flatMap(StructureToProteinDimers(9, 20, False, True))

        self.assertTrue(pdb_3.count() == 5)
Esempio n. 4
0
    def test3(self):
        pdbIds = ["4GIS"]
        # A3-A2
        # A4-A1
        # B5-A1
        # B6-A2
        # B6-B5
        # B7-A3
        # B7-A4
        # B8-A4
        # B8-B7
        self.pdb = download_mmtf_files(pdbIds, self.sc)

        pdb_3 = self.pdb.flatMap(StructureToBioassembly()) \
                        .flatMap(StructureToProteinDimers(8, 20, False, True))

        self.assertTrue(pdb_3.count() == 9)
Esempio n. 5
0

protein = mmtfReader.download_mmtf_files(["1STP"], sc)


# ## Map protein structure to Bioassembly, then map to protein dimers

# In[4]:


cutoffDistance = 8.0
contacts = 20
useAllAtoms = False
exclusive = True

dimers = protein.flatMap(StructureToBioassembly())                 .flatMap(StructureToProteinDimers(cutoffDistance, contacts, useAllAtoms, exclusive))
    


# ## Count number of structures

# In[5]:


print(f"Number of structures : {dimers.count()}")


# ## Terminate Spark

# In[6]: