예제 #1
0
    def setUpClass(self):
        self.io = MMCIFIO()
        self.mmcif_parser = MMCIFParser()
        self.pdb_parser = PDBParser()

        with warnings.catch_warnings():
            warnings.simplefilter("ignore", PDBConstructionWarning)
            self.structure = self.pdb_parser.get_structure(
                "example", "PDB/1A8O.pdb")
            self.mmcif_file = "PDB/1A8O.cif"
            self.mmcif_multimodel_pdb_file = "PDB/1SSU_mod.pdb"
            self.mmcif_multimodel_mmcif_file = "PDB/1SSU_mod.cif"
예제 #2
0
    def _writeLowLevel(self, fileName, dict):
        """ write a dictionary as cif file
        """

        if fileName.endswith(".pdb"):
            print("Low level access to PDB is not implemented")
        else:
            if self.ioCIF is None:
                self.ioCIF = MMCIFIO()
            io = self.ioCIF
        io.set_dict(dict)
        io.save(fileName)
예제 #3
0
 def test_mmcifio_multimodel(self):
     """Write a multi-model, multi-chain mmCIF file."""
     pdb_struct = self.pdb_parser.get_structure(
         "1SSU_mod_pdb", self.mmcif_multimodel_pdb_file)
     mmcif_struct = self.mmcif_parser.get_structure(
         "1SSU_mod_mmcif", self.mmcif_multimodel_mmcif_file)
     io = MMCIFIO()
     for struct in [pdb_struct, mmcif_struct]:
         self.io.set_structure(struct)
         filenumber, filename = tempfile.mkstemp()
         os.close(filenumber)
         try:
             self.io.save(filename)
             struct_in = self.mmcif_parser.get_structure(
                 "1SSU_mod_in", filename)
             self.assertEqual(len(struct_in), 2)
             self.assertEqual(len(struct_in[1]), 2)
             self.assertEqual(
                 round(float(struct_in[1]["B"][1]["N"].get_coord()[0]), 3),
                 6.259)
         finally:
             os.remove(filename)
예제 #4
0
def visualize_2DA(apo_2DA, holo_2DA, paper_apo_spans):
    """ Writes superimposed holo structure to a file, prints Pymol script which can be directly pasted in pymol.

     Printed Pymol script will:
     1) automatically load both structures (superimposed holo from filesystem, apo from the internet)
     2) create objects and selections for domains, and the two-domain arrangements
     3) color the selections by domain, apo/holo and paper/ours
        - colors - ours more saturation, paper faded
            - red, yellow apo (first and second domain respectively)
            - green, blue holo
     4) provide example usage in the last script paragraph
     """

    # load the structure from file
    a = parse_mmcif(apo_2DA.pdb_code)
    h = parse_mmcif(holo_2DA.pdb_code)
    apo = a.structure
    holo = h.structure

    ###### vlozene z mainu
    apo_mapping = a.bio_to_mmcif_mappings[0][apo_2DA.d1.chain_id]
    holo_mapping = h.bio_to_mmcif_mappings[0][holo_2DA.d1.chain_id]

    # crop polypeptides to longest common substring
    c1_common_seq, c2_common_seq = get_longest_common_polypeptide(a.poly_seqs[apo_mapping.entity_poly_id], h.poly_seqs[holo_mapping.entity_poly_id])
    c1_label_seq_ids = list(c1_common_seq.keys())
    c2_label_seq_ids = list(c2_common_seq.keys())

    label_seq_id_offset = c2_label_seq_ids[0] - c1_label_seq_ids[0]
    ###### end vlozene

    # get residues of the first domain, in both apo and holo structures
    apo_d1 = DomainResidues.from_domain(apo_2DA.d1, apo[0], apo_mapping)
    holo_d1 = DomainResidues.from_domain(holo_2DA.d1, holo[0], holo_mapping)
    # superimpose holo onto apo, using the first domain
    superimposed_holo_model = superimpose_structure(holo[0], holo_d1, apo_d1)
    # save the structure
    name = holo.id + f'_{holo_d1.domain_id}onto_{apo_d1.domain_id}'
    io = MMCIFIO()
    superimposed_holo = Structure(name)
    superimposed_holo.add(superimposed_holo_model)
    io.set_structure(superimposed_holo)
    sholo_file_path = Path(OUTPUT_DIR, name + '.cif')
    io.save(str(sholo_file_path), preserve_atom_numbering=True)

    def get_resi_selection(spans):
        selection = []
        for from_, to in spans:
            selection.append(f'resi {from_}-{to}')

        return '(' + ' or '.join(selection) + ')'

    # convert paper spans to label seqs, so we can show them in Pymol
    def get_paper_domain(d: DomainResidueMapping, paper_spans, residue_id_mapping):
        # translate spans to label seq ids and return a domain object
        segment_beginnings = list(map(residue_id_mapping.find_label_seq, np.array(paper_spans)[:, 0].tolist()))
        segment_ends = list(map(residue_id_mapping.find_label_seq, np.array(paper_spans)[:, 1].tolist()))
        logger.debug(segment_beginnings)
        logger.debug(segment_ends)
        return DomainResidueMapping(d.domain_id, d.chain_id, segment_beginnings, segment_ends)

    logger.debug(paper_apo_spans)  # [d1, d2] where d1 [(), (),...]
    paper_apo_drm1 = get_paper_domain(apo_2DA.d1, paper_apo_spans[0], apo_mapping)
    paper_apo_drm2 = get_paper_domain(apo_2DA.d2, paper_apo_spans[1], apo_mapping)
    label_seq_id_offset = c2_label_seq_ids[0] - c1_label_seq_ids[0]
    paper_holo_drm1 = DomainResidueMapping.from_domain_on_another_chain(paper_apo_drm1, holo_d1.chain_id, label_seq_id_offset)
    paper_holo_drm2 = DomainResidueMapping.from_domain_on_another_chain(paper_apo_drm2, holo_d1.chain_id, label_seq_id_offset)  # same chain, for now, as in d1

    # create highlight script (by the spans, or just create multiple selections)
    # copy the 2 structures to 4 (paper spans vs our spans), so we can color them differently
    # select only the domains (2), and make only them visible

    sholo = superimposed_holo

    pymol_script = f"""
fetch {apo.id}
load {sholo_file_path.absolute()}

sele apo_d1, {apo.id} and chain {apo_2DA.d1.chain_id} and {get_resi_selection(apo_2DA.d1.get_spans())}
sele apo_d2, {apo.id} and chain {apo_2DA.d2.chain_id} and {get_resi_selection(apo_2DA.d2.get_spans())}
sele apo_2DA, apo_d1 or apo_d2

sele holo_d1, {sholo.id} and chain {holo_2DA.d1.chain_id} and {get_resi_selection(holo_2DA.d1.get_spans())}
sele holo_d2, {sholo.id} and chain {holo_2DA.d2.chain_id} and {get_resi_selection(holo_2DA.d2.get_spans())}
sele holo_2DA, holo_d1 or holo_d2

# copy objects, so we can color them differently
copy paper_{apo.id}, {apo.id}
copy paper_{sholo.id}, {sholo.id}

sele paper_apo_d1, paper_{apo.id} and chain {apo_2DA.d1.chain_id} and {get_resi_selection(paper_apo_drm1.get_spans())}
sele paper_apo_d2, paper_{apo.id} and chain {apo_2DA.d2.chain_id} and {get_resi_selection(paper_apo_drm2.get_spans())}
sele paper_apo_2DA, paper_apo_d1 or paper_apo_d2

sele paper_holo_d1, paper_{sholo.id} and chain {holo_2DA.d1.chain_id} and {get_resi_selection(paper_holo_drm1.get_spans())}
sele paper_holo_d2, paper_{sholo.id} and chain {holo_2DA.d2.chain_id} and {get_resi_selection(paper_holo_drm2.get_spans())}
sele paper_holo_2DA, paper_holo_d1 or paper_holo_d2

color red, apo_d1
color yellow, apo_d2
color green, holo_d1
color blue, holo_d2

color salmon, paper_apo_d1
color paleyellow, paper_apo_d2
color palegreen, paper_holo_d1
color lightblue, paper_holo_d2

# example usage: 
hide; show surface, apo_2DA
hide; show surface, paper_apo_2DA
hide; show surface, holo_2DA
hide; show surface, paper_holo_2DA

hide; show surface, apo_2DA or holo_2DA or paper_apo_2DA or paper_holo_2DA
    """

    print(pymol_script)
예제 #5
0
from Bio.PDB import PDBParser, MMCIFIO
from Bio.PDB import PDBIO, MMCIFParser

test_structures = ['1r70', '1zbl', '1zir', '3wu2']

for structure in test_structures:
    p = PDBParser()
    struc = p.get_structure("", f"../data/{structure}.pdb")
    io = MMCIFIO()
    io.set_structure(struc)
    io.save(f"pdb2cif_{structure}.cif")

for structure in test_structures:
    p = PDBParser()
    struc = p.get_structure("", f"../data/{structure}.pdb")
    io = PDBIO()
    io.set_structure(struc)
    io.save(f"pdb2pdb{structure}.pdb")

for structure in test_structures:
    p = MMCIFParser()
    struc = p.get_structure("", f"../data/{structure}.cif")
    io = MMCIFIO()
    io.set_structure(struc)
    io.save(f"cif2cif_{structure}.cif")

for structure in test_structures:
    p = MMCIFParser()
    struc = p.get_structure("", f"../data/{structure}.cif")
    io = PDBIO()
    io.set_structure(struc)
예제 #6
0
def main():

    ap = argparse.ArgumentParser(description=__doc__)
    ap.add_argument('infmt',
                    choices=['pdb', 'mmcif'],
                    help='File format of input files.')
    ap.add_argument('folder',
                    type=pathlib.Path,
                    help='Top-level folder with input files')
    ap.add_argument('--no-continue',
                    action='store_true',
                    default=False,
                    help='Parses all input files, ignoring existing results.')
    ap.add_argument('--strict',
                    action='store_true',
                    default=False,
                    help='Parse with PDBParser PERMISSIVE=0')
    args = ap.parse_args()

    # Setup logging
    setup_logging()

    permissive_bool = not args.strict

    if args.infmt == 'pdb':
        parser = PDBParser(PERMISSIVE=permissive_bool, QUIET=1)
        writer = PDBIO()
    elif args.infmt == 'mmcif':
        parser = MMCIFParser(QUIET=1)
        writer = MMCIFIO()

    flist = sorted(args.folder.rglob('*.gz'))

    xmllist = sorted(args.folder.rglob('*.xml'))
    if not args.no_continue and xmllist:
        logging.info(f'Found {len(xmllist)} existing result files')
        xmlset = {f.stem: f for f in xmllist}
        fset = {f.stem: f for f in flist}
        remainder = set(fset.keys()) - set(xmlset.keys())
        logging.info(f'Resuming benchmark: {len(remainder)} files left')
        flist = sorted(fset[f] for f in remainder)
    else:
        logging.info(f'Found {len(flist)} files')

    n_digits = len(str(len(flist)))  # for fmting

    for idx, fpath in enumerate(flist, start=1):
        try:
            # Parse
            with gzip.open(fpath, mode='rt') as handle:
                t0 = time.time()
                s = parser.get_structure(fpath.name, handle)
                t1 = time.time()

                read_time = t1 - t0

                data = summarize_structure(s)

            # Write
            writer.set_structure(s)
            t0 = time.time()
            writer.save('io.temp')
            t1 = time.time()
            write_time = t1 - t0

            # Round-trip
            s2 = parser.get_structure('new', 'io.temp')
            data2 = summarize_structure(s2)

            assert data == data2, f'Summaries differ: {data} != {data2}'

            test_element_assignment(s)  # raises assert if failed

        except Exception as err:
            with fpath.with_suffix('.failed').open('w') as f:
                print(err, file=f)
                print(traceback.format_exc(), file=f)

            status = 'failed'

        else:

            # Write XML file with numbers
            root = Element('structure')
            root.set('path', fpath.name)
            root.set('parse_time', f'{read_time:5.3f}')
            root.set('write_time', f'{write_time:5.3f}')

            for key, value in data.items():
                child = SubElement(root, key)
                child.text = str(value)

            # Reparse for pretty print
            xml = minidom.parseString(tostring(root, 'utf-8'))

            # Write to file
            with fpath.with_suffix('.xml').open('w') as f:
                f.write(xml.toprettyxml(indent='  '))

            # Clear XML memory
            root.clear()
            xml.unlink()
            del root, xml

            status = 'ok'

        finally:
            try:
                os.remove('io.temp')
            except Exception:
                pass

            memusage = psutil.virtual_memory().percent

            logging.info(
                f'{idx:>{n_digits}d}/{len(flist)} {fpath.parent.name}/{fpath.name}: {status} | mem% = {memusage}',
            )  # to check for leaks
예제 #7
0
class WriteTest(unittest.TestCase):
    @classmethod
    def setUpClass(self):
        self.io = MMCIFIO()
        self.mmcif_parser = MMCIFParser()
        self.pdb_parser = PDBParser()

        with warnings.catch_warnings():
            warnings.simplefilter("ignore", PDBConstructionWarning)
            self.structure = self.pdb_parser.get_structure(
                "example", "PDB/1A8O.pdb")
            self.mmcif_file = "PDB/1A8O.cif"
            self.mmcif_multimodel_pdb_file = "PDB/1SSU_mod.pdb"
            self.mmcif_multimodel_mmcif_file = "PDB/1SSU_mod.cif"

    def test_mmcifio_write_structure(self):
        """Write a full structure using MMCIFIO."""
        struct1 = self.structure
        # Write full model to temp file
        self.io.set_structure(struct1)
        filenumber, filename = tempfile.mkstemp()
        os.close(filenumber)
        try:
            self.io.save(filename)
            struct2 = self.mmcif_parser.get_structure("1a8o", filename)
            nresidues = len(list(struct2.get_residues()))
            self.assertEqual(len(struct2), 1)
            self.assertEqual(nresidues, 158)
        finally:
            os.remove(filename)

    def test_mmcifio_write_residue(self):
        """Write a single residue using MMCIFIO."""
        struct1 = self.structure
        residue1 = list(struct1.get_residues())[0]
        # Write full model to temp file
        self.io.set_structure(residue1)
        filenumber, filename = tempfile.mkstemp()
        os.close(filenumber)
        try:
            self.io.save(filename)
            struct2 = self.mmcif_parser.get_structure("1a8o", filename)
            nresidues = len(list(struct2.get_residues()))
            self.assertEqual(nresidues, 1)
        finally:
            os.remove(filename)

    def test_mmcifio_write_residue_w_chain(self):
        """Write a single residue (chain id == X) using MMCIFIO."""
        struct1 = self.structure.copy()  # make copy so we can change it
        residue1 = list(struct1.get_residues())[0]

        # Modify parent id
        parent = residue1.parent
        parent.id = "X"

        # Write full model to temp file
        self.io.set_structure(residue1)
        filenumber, filename = tempfile.mkstemp()
        os.close(filenumber)
        try:
            self.io.save(filename)
            struct2 = self.mmcif_parser.get_structure("1a8o", filename)
            nresidues = len(list(struct2.get_residues()))
            self.assertEqual(nresidues, 1)

            # Assert chain remained the same
            chain_id = [c.id for c in struct2.get_chains()][0]
            self.assertEqual(chain_id, "X")
        finally:
            os.remove(filename)

    def test_mmcifio_write_residue_wout_chain(self):
        """Write a single orphan residue using MMCIFIO."""
        struct1 = self.structure
        residue1 = list(struct1.get_residues())[0]

        residue1.parent = None  # detach residue

        # Write full model to temp file
        self.io.set_structure(residue1)

        filenumber, filename = tempfile.mkstemp()
        os.close(filenumber)
        try:
            self.io.save(filename)
            struct2 = self.mmcif_parser.get_structure("1a8o", filename)
            nresidues = len(list(struct2.get_residues()))
            self.assertEqual(nresidues, 1)

            # Assert chain is default: "A"
            chain_id = [c.id for c in struct2.get_chains()][0]
            self.assertEqual(chain_id, "A")
        finally:
            os.remove(filename)

    def test_mmcifio_write_custom_residue(self):
        """Write a chainless residue using PDBIO."""
        res = Residue.Residue((" ", 1, " "), "DUM", "")
        atm = Atom.Atom("CA", [0.1, 0.1, 0.1], 1.0, 1.0, " ", "CA", 1, "C")
        res.add(atm)

        # Ensure that set_structure doesn't alter parent
        parent = res.parent

        # Write full model to temp file
        self.io.set_structure(res)

        self.assertIs(parent, res.parent)
        filenumber, filename = tempfile.mkstemp()
        os.close(filenumber)
        try:
            self.io.save(filename)
            struct2 = self.mmcif_parser.get_structure("res", filename)
            latoms = list(struct2.get_atoms())
            self.assertEqual(len(latoms), 1)
            self.assertEqual(latoms[0].name, "CA")
            self.assertEqual(latoms[0].parent.resname, "DUM")
            self.assertEqual(latoms[0].parent.parent.id, "A")
        finally:
            os.remove(filename)

    def test_mmcifio_select(self):
        """Write a selection of the structure using a Select subclass."""

        # Selection class to filter all alpha carbons
        class CAonly(Select):
            """Accepts only CA residues."""
            def accept_atom(self, atom):
                if atom.name == "CA" and atom.element == "C":
                    return 1

        struct1 = self.structure
        # Write to temp file
        self.io.set_structure(struct1)
        filenumber, filename = tempfile.mkstemp()
        os.close(filenumber)
        try:
            self.io.save(filename, CAonly())
            struct2 = self.mmcif_parser.get_structure("1a8o", filename)
            nresidues = len(list(struct2.get_residues()))
            self.assertEqual(nresidues, 70)
        finally:
            os.remove(filename)

    def test_mmcifio_write_dict(self):
        """Write an mmCIF dictionary out, read it in and compare them."""
        d1 = MMCIF2Dict(self.mmcif_file)

        # Write to temp file
        self.io.set_dict(d1)
        filenumber, filename = tempfile.mkstemp()
        os.close(filenumber)
        try:
            self.io.save(filename)
            d2 = MMCIF2Dict(filename)
            k1 = sorted(d1.keys())
            k2 = sorted(d2.keys())
            self.assertEqual(k1, k2)
            for key in k1:
                self.assertEqual(d1[key], d2[key])
        finally:
            os.remove(filename)

    def test_mmcifio_multimodel(self):
        """Write a multi-model, multi-chain mmCIF file."""
        pdb_struct = self.pdb_parser.get_structure(
            "1SSU_mod_pdb", self.mmcif_multimodel_pdb_file)
        mmcif_struct = self.mmcif_parser.get_structure(
            "1SSU_mod_mmcif", self.mmcif_multimodel_mmcif_file)
        io = MMCIFIO()
        for struct in [pdb_struct, mmcif_struct]:
            self.io.set_structure(struct)
            filenumber, filename = tempfile.mkstemp()
            os.close(filenumber)
            try:
                self.io.save(filename)
                struct_in = self.mmcif_parser.get_structure(
                    "1SSU_mod_in", filename)
                self.assertEqual(len(struct_in), 2)
                self.assertEqual(len(struct_in[1]), 2)
                self.assertEqual(
                    round(float(struct_in[1]["B"][1]["N"].get_coord()[0]), 3),
                    6.259)
            finally:
                os.remove(filename)