Beispiel #1
0
 def test_res1_chain_1(self):
     contact = Contact(1, 2, 1.0)
     self.assertEqual('', contact.res1_chain)
     contact.res1_chain = 'A'
     self.assertEqual('A', contact.res1_chain)
     contact.res1_chain = 'd'
     self.assertEqual('d', contact.res1_chain)
Beispiel #2
0
 def test_res1_chain_1(self):
     contact = Contact(1, 2, 1.0)
     self.assertEqual("", contact.res1_chain)
     contact.res1_chain = "A"
     self.assertEqual("A", contact.res1_chain)
     contact.res1_chain = "d"
     self.assertEqual("d", contact.res1_chain)
Beispiel #3
0
 def test_write_6(self):
     contact_file = ContactFile('RR')
     contact_map = ContactMap('1')
     contact_file.add(contact_map)
     for c in [('A', 1, 'B', 9, 0, 8, 0.7), ('A', 1, 'B', 10, 0, 8, 0.7),
               ('A', 2, 'B', 8, 0, 8, 0.9), ('A', 3, 'B', 12, 0, 8, 0.4)]:
         contact = Contact(c[1], c[3], c[6], distance_bound=(c[4], c[5]))
         contact.res1_chain = c[0]
         contact.res2_chain = c[2]
         contact_map.add(contact)
     contact_map.sequence = Sequence(
         '1',
         'HLEGSIGILLKKHEIVFDGCHDFGRTYIWQMSDHLEGSIGILLKKHEIVFDGCHDFGRTYIWQMSD'
     )
     f_name = create_tmp_f()
     with open(f_name, 'w') as f_out:
         CaspParser().write(f_out, contact_file)
     content = [
         "PFRMAT RR",
         "MODEL  1",
         "HLEGSIGILLKKHEIVFDGCHDFGRTYIWQMSDHLEGSIGILLKKHEIVF",
         "DGCHDFGRTYIWQMSD",
         "A1    B9    0   8   0.700000",
         "A1    B10   0   8   0.700000",
         "A2    B8    0   8   0.900000",
         "A3    B12   0   8   0.400000",
         "ENDMDL",
         "END",
     ]
     with open(f_name, 'r') as f_in:
         output = f_in.read().splitlines()
     self.assertEqual(content, output)
     os.unlink(f_name)
Beispiel #4
0
    def read(self, f_handle, f_id="ncont"):
        """Read a contact file

        Parameters
        ----------
        f_handle
           Open file handle [read permissions]
        f_id : str, optional
           Unique contact file identifier

        Returns
        -------
        :obj:`~conkit.core.contactfile.ContactFile`

        """

        contact_file = ContactFile(f_id)
        contact_map = ContactMap("map_1")
        contact_file.add(contact_map)

        for line in f_handle:
            line = line.strip()

            if RE_CONTACT.match(line):
                matches = RE_CONTACT.match(line)
                res1_seq = int(matches.group(2))
                res2_seq = int(matches.group(5))
                lb = ub = float(matches.group(7))

                if (res1_seq, res2_seq) in contact_map:
                    msg = (
                        "This parser cannot handle multiple atoms of the same residue. "
                        "If your contact map contains such entries, only the first will be stored!"
                    )
                    warnings.warn(msg, Warning)
                    continue

                contact = Contact(res1_seq,
                                  res2_seq,
                                  1.0,
                                  distance_bound=(lb, ub))
                contact.res1_chain = matches.group(1)
                contact.res2_chain = matches.group(4)
                contact.res1 = matches.group(3)
                contact.res2 = matches.group(6)
                contact_map.add(contact)

        contact_file.method = "Contact map generated using Ncont"
        return contact_file
Beispiel #5
0
 def test_write_6(self):
     contact_file = ContactFile("RR")
     contact_map = ContactMap("1")
     contact_file.add(contact_map)
     for c in [
         ("A", 1, "B", 9, 0, 8, 0.7),
         ("A", 1, "B", 10, 0, 8, 0.7),
         ("A", 2, "B", 8, 0, 8, 0.9),
         ("A", 3, "B", 12, 0, 8, 0.4),
     ]:
         contact = Contact(c[1], c[3], c[6], distance_bound=(c[4], c[5]))
         contact.res1_chain = c[0]
         contact.res2_chain = c[2]
         contact_map.add(contact)
     contact_map.sequence = Sequence(
         "1",
         "HLEGSIGILLKKHEIVFDGCHDFGRTYIWQMSDHLEGSIGILLKKHEIVFDGCHDFGRTYIWQMSD"
     )
     f_name = self.tempfile()
     with open(f_name, "w") as f_out:
         CaspParser().write(f_out, contact_file)
     content = [
         "PFRMAT RR",
         "MODEL  1",
         "HLEGSIGILLKKHEIVFDGCHDFGRTYIWQMSDHLEGSIGILLKKHEIVF",
         "DGCHDFGRTYIWQMSD",
         "A1    B9    0   8   0.700000",
         "A1    B10   0   8   0.700000",
         "A2    B8    0   8   0.900000",
         "A3    B12   0   8   0.400000",
         "ENDMDL",
         "END",
     ]
     with open(f_name, "r") as f_in:
         output = f_in.read().splitlines()
     self.assertEqual(content, output)
Beispiel #6
0
    def read(self, f_handle, f_id="casp"):
        """Read a contact file into a :obj:`conkit.core.contactfile.ContactFile` instance

        Parameters
        ----------
        f_handle
           Open file handle [read permissions]
        f_id : str, optional
           Unique contact file identifier

        Returns
        -------
        :obj:`ContactFile <conkit.core.contactfile.ContactFile>`

        """
        lines = [l.strip() for l in f_handle.readlines()]

        contact_file = ContactFile(f_id)

        it = iter(lines)
        while True:

            try:
                line = next(it)
            except StopIteration:
                break

            if RE_PRFMAT.match(line):
                continue

            elif RE_TARGET.match(line):
                contact_file.remark = RE_TARGET.match(line).group(1)

            elif RE_AUTHOR.match(line):
                contact_file.author = RE_AUTHOR.match(line).group(1)

            elif RE_REMARK.match(line):
                contact_file.remark = RE_REMARK.match(line).group(1)

            elif RE_METHOD.match(line):
                contact_file.method = RE_METHOD.match(line).group(1)

            elif RE_MODEL.match(line):
                contact_map = ContactMap(RE_MODEL.match(line).group(1))

                seq_chunks = []

                while True:

                    try:
                        line = next(it)
                    except StopIteration:
                        break

                    if not line:
                        break

                    if RE_ENDMDL.match(line):
                        break

                    elif RE_END.match(line):
                        break

                    elif RE_SEQ.match(line):
                        seq_chunks.append(line)

                    else:
                        res1_entry, res2_entry, lb, ub, raw_score = RE_SPLIT.split(line)

                        # Split in case we have chain in inter-molecular scenarios
                        res1_split = RE_RES.split(res1_entry)
                        if len(res1_split) == 1:
                            res1_chain, res1_seq = '', res1_split[0]
                        elif len(res1_split) == 4:
                            res1_chain, res1_seq = res1_split[1], res1_split[2]
                        res2_split = RE_RES.split(res2_entry)

                        if len(res2_split) == 1:
                            res2_chain, res2_seq = '', res2_split[0]
                        elif len(res2_split) == 4:
                            res2_chain, res2_seq = res2_split[1], res2_split[2]

                        contact = Contact(int(res1_seq), int(res2_seq), float(raw_score),
                                          distance_bound=(float(lb), float(ub)))
                        contact.res1_chain = res1_chain
                        contact.res2_chain = res2_chain
                        contact.res1_altseq = int(res1_seq)
                        contact.res2_altseq = int(res2_seq)
                        contact_map.add(contact)

                if seq_chunks:
                    seq = "".join(seq_chunks)
                    sequence = Sequence('seq_{0}'.format(contact_map.id), seq)
                    contact_map.sequence = sequence
                    contact_map.assign_sequence_register()
                contact_file.add(contact_map)

            elif RE_END.match(line):
                break

            else:
                raise ValueError('Unrecognized line type. Please report this issue')

        return contact_file
Beispiel #7
0
    def _read(self, structure, f_id, distance_cutoff, atom_type):
        """Read a contact file

        Parameters
        ----------
        structure
           A :obj:`Structure <Bio.PDB.Structure.Structure>` instance
        f_id : str
           Unique contact file identifier
        distance_cutoff : int
           Distance cutoff for which to determine contacts
        atom_type : str
           Atom type between which distances are calculated

        Returns
        -------
        :obj:`ContactFile <conkit.core.contactfile.ContactFile>`

        """
        hierarchies = []
        for model in structure:
            hierarchy = ContactFile(f_id + '_' + str(model.id))
            chains = list(chain for chain in model)

            for chain in chains:
                self._remove_hetatm(chain)
                self._remove_atom(chain, atom_type)

            for chain1, chain2 in itertools.product(chains, chains):
                if chain1.id == chain2.id:  # intra
                    contact_map = ContactMap(chain1.id)
                else:  # inter
                    contact_map = ContactMap(chain1.id + chain2.id)

                for (atom1, atom2, distance) in self._chain_contacts(chain1, chain2):
                    contact = Contact(
                        atom1.resseq,
                        atom2.resseq,
                        round(1.0 - (distance / 100), 6),
                        distance_bound=(0., float(distance_cutoff)))

                    contact.res1_altseq = atom1.resseq_alt
                    contact.res2_altseq = atom2.resseq_alt
                    contact.res1 = atom1.resname
                    contact.res2 = atom2.resname
                    contact.res1_chain = atom1.reschain
                    contact.res2_chain = atom2.reschain

                    if distance_cutoff == 0 or distance < distance_cutoff:
                        contact.define_match()
                        contact_map.add(contact)

                if contact_map.empty:
                    del contact_map
                else:
                    if len(contact_map.id) == 1:
                        contact_map.sequence = self._build_sequence(chain1)
                        assert len(contact_map.sequence.seq) == len(chain1)
                    else:
                        contact_map.sequence = self._build_sequence(chain1) \
                            + self._build_sequence(chain2)
                        assert len(contact_map.sequence.seq) \
                            == len(chain1) + len(chain2)
                    hierarchy.add(contact_map)

            hierarchy.method = 'Contact map extracted from PDB ' + str(model.id)
            hierarchy.remark = [
                'The model id is the chain identifier, i.e XY equates to chain X and chain Y.',
                'Residue numbers in column 1 are chain X, and numbers in column 2 are chain Y.'
            ]
            hierarchies.append(hierarchy)

        if len(hierarchies) > 1:
            msg = "Super-level to contact file not yet implemented. " \
                  "Parser returns hierarchy for top model only!"
            warnings.warn(msg, FutureWarning)
        return hierarchies[0]
Beispiel #8
0
    def read(self, f_handle, f_id="gremlin"):
        """Read a contact file

        Parameters
        ----------
        f_handle
           Open file handle [read permissions]
        f_id : str, optional
           Unique contact file identifier

        Returns
        -------
        :obj:`ContactFile <conkit.core.contactfile.ContactFile>`

        """
        hierarchy = ContactFile(f_id)

        lines = iter([l.rstrip() for l in f_handle if l.rstrip()])
        done = object()
        line = next(lines, done)

        inter = False
        chain_list = set()
        contact_list = []
        while line is not done:

            if RE_COMMENT.match(line):
                hierarchy.remark = RE_COMMENT.match(line).group(1)

            elif RE_HEADER_INTRA.match(line):
                inter = False
            elif RE_HEADER_INTER.match(line):
                inter = True
            else:
                if inter:
                    res1_seq, res2_seq, chain, _, _, raw_score, scalar_score, _, _ = RE_SPLIT.split(line)
                else:
                    res1_seq, res2_seq, _, _, raw_score, scalar_score, _ = RE_SPLIT.split(line)
                    chain = 'UNK'

                c = Contact(int(res1_seq), int(res2_seq), float(raw_score))
                c.scalar_score = float(scalar_score)

                if chain == 'UNK':
                    chain_list.add('UNK')
                elif len(chain) == 1:
                    c.res1_chain = chain[0]
                    c.res2_chain = chain[0]
                    chain_list.add((c.res1_chain, c.res2_chain))
                elif len(chain) == 2:
                    c.res1_chain = chain[0]
                    c.res2_chain = chain[1]
                    chain_list.add((c.res1_chain, c.res2_chain))
                elif len(chain) > 2:
                    raise ValueError('Cannot distinguish between chains')

                contact_list.append(c)

            line = next(lines, done)

        chain_list = list(chain_list)
        if len(chain_list) == 1 and chain_list[0] == 'UNK':
            contact_map = ContactMap('1')
            for c in contact_list:
                contact_map.add(c)
            hierarchy.add(contact_map)
        elif len(chain_list) == 1:
            chain = chain_list[0]
            map_id = chain[0] if chain[0] == chain[1] else "".join(chain)
            contact_map = ContactMap(map_id)
            for c in contact_list:
                contact_map.add(c)
            hierarchy.add(contact_map)
        else:
            for chain in chain_list:
                map_id = chain[0] if chain[0] == chain[1] else "".join(chain)
                contact_map = ContactMap(map_id)
                for c in contact_list:
                    if c.res1_chain == chain[0] and c.res2_chain == chain[1]:
                        contact_map.add(c)
                hierarchy.add(contact_map)

        hierarchy.sort('id', inplace=True)
        return hierarchy