Example #1
0
    def _read(self, structure, f_id, distance_cutoff, atom_type):
        """Read a contact file

        Parameters
        ----------
        structure
           A :obj:`~Bio.PDB.Structure.Structure>` instance
        f_id : str
           Unique contact file identifier
        distance_cutoff : int
           Distance cutoff for which to determine contacts
        atom_type : str
           Atom type between which distances are calculated

        Returns
        -------
        :obj:`~conkit.core.contactfile.ContactFile~`

        """
        hierarchies = []
        for model in structure:
            hierarchy = ContactFile(f_id + '_' + str(model.id))
            chains = list(chain for chain in model)

            for chain in chains:
                self._remove_hetatm(chain)
                self._remove_atom(chain, atom_type)

            for chain1, chain2 in itertools.product(chains, chains):
                if chain1.id == chain2.id:  # intra
                    contact_map = ContactMap(chain1.id)
                else:  # inter
                    contact_map = ContactMap(chain1.id + chain2.id)

                for (atom1, atom2,
                     distance) in self._chain_contacts(chain1, chain2):
                    contact = Contact(atom1.resseq,
                                      atom2.resseq,
                                      round(1.0 - (distance / 100), 6),
                                      distance_bound=(0.,
                                                      float(distance_cutoff)))

                    contact.res1_altseq = atom1.resseq_alt
                    contact.res2_altseq = atom2.resseq_alt
                    contact.res1 = atom1.resname
                    contact.res2 = atom2.resname
                    contact.res1_chain = atom1.reschain
                    contact.res2_chain = atom2.reschain

                    if distance_cutoff == 0 or distance < distance_cutoff:
                        contact.true_positive = True
                        contact_map.add(contact)

                if contact_map.empty:
                    del contact_map
                else:
                    if len(contact_map.id) == 1:
                        contact_map.sequence = self._build_sequence(chain1)
                        assert len(contact_map.sequence.seq) == len(chain1)
                    else:
                        contact_map.sequence = self._build_sequence(chain1) \
                            + self._build_sequence(chain2)
                        assert len(contact_map.sequence.seq) \
                            == len(chain1) + len(chain2)
                    hierarchy.add(contact_map)

            hierarchy.method = 'Contact map extracted from PDB ' + str(
                model.id)
            hierarchy.remark = [
                'The model id is the chain identifier, i.e XY equates to chain X and chain Y.',
                'Residue numbers in column 1 are chain X, and numbers in column 2 are chain Y.'
            ]
            hierarchies.append(hierarchy)

        if len(hierarchies) > 1:
            msg = "Super-level to contact file not yet implemented. " \
                  "Parser returns hierarchy for top model only!"
            warnings.warn(msg, FutureWarning)
        return hierarchies[0]
Example #2
0
    def read(self, f_handle, f_id="gremlin"):
        """Read a contact file

        Parameters
        ----------
        f_handle
           Open file handle [read permissions]
        f_id : str, optional
           Unique contact file identifier

        Returns
        -------
        :obj:`ContactFile <conkit.core.contactfile.ContactFile>`

        """
        hierarchy = ContactFile(f_id)

        lines = iter([l.rstrip() for l in f_handle if l.rstrip()])
        done = object()
        line = next(lines, done)

        inter = False
        chain_list = set()
        contact_list = []
        while line is not done:

            if RE_COMMENT.match(line):
                hierarchy.remark = RE_COMMENT.match(line).group(1)

            elif RE_HEADER_INTRA.match(line):
                inter = False
            elif RE_HEADER_INTER.match(line):
                inter = True
            else:
                if inter:
                    res1_seq, res2_seq, chain, _, _, raw_score, scalar_score, _, _ = RE_SPLIT.split(line)
                else:
                    res1_seq, res2_seq, _, _, raw_score, scalar_score, _ = RE_SPLIT.split(line)
                    chain = 'UNK'

                c = Contact(int(res1_seq), int(res2_seq), float(raw_score))
                c.scalar_score = float(scalar_score)

                if chain == 'UNK':
                    chain_list.add('UNK')
                elif len(chain) == 1:
                    c.res1_chain = chain[0]
                    c.res2_chain = chain[0]
                    chain_list.add((c.res1_chain, c.res2_chain))
                elif len(chain) == 2:
                    c.res1_chain = chain[0]
                    c.res2_chain = chain[1]
                    chain_list.add((c.res1_chain, c.res2_chain))
                elif len(chain) > 2:
                    raise ValueError('Cannot distinguish between chains')

                contact_list.append(c)

            line = next(lines, done)

        chain_list = list(chain_list)
        if len(chain_list) == 1 and chain_list[0] == 'UNK':
            contact_map = ContactMap('1')
            for c in contact_list:
                contact_map.add(c)
            hierarchy.add(contact_map)
        elif len(chain_list) == 1:
            chain = chain_list[0]
            map_id = chain[0] if chain[0] == chain[1] else "".join(chain)
            contact_map = ContactMap(map_id)
            for c in contact_list:
                contact_map.add(c)
            hierarchy.add(contact_map)
        else:
            for chain in chain_list:
                map_id = chain[0] if chain[0] == chain[1] else "".join(chain)
                contact_map = ContactMap(map_id)
                for c in contact_list:
                    if c.res1_chain == chain[0] and c.res2_chain == chain[1]:
                        contact_map.add(c)
                hierarchy.add(contact_map)

        hierarchy.sort('id', inplace=True)
        return hierarchy
Example #3
0
    def read(self, f_handle, f_id="casp"):
        """Read a contact file into a :obj:`~conkit.core.contactfile.ContactFile` instance

        Parameters
        ----------
        f_handle
           Open file handle [read permissions]
        f_id : str, optional
           Unique contact file identifier

        Returns
        -------
        :obj:`~conkit.core.contactfile.ContactFile`

        """
        lines = [l.strip() for l in f_handle.readlines()]
        contact_file = ContactFile(f_id)
        it = iter(lines)
        while True:
            try:
                line = next(it)
            except StopIteration:
                break
            if RE_PRFMAT.match(line):
                continue
            elif RE_TARGET.match(line):
                contact_file.remark = RE_TARGET.match(line).group(1)
            elif RE_AUTHOR.match(line):
                contact_file.author = RE_AUTHOR.match(line).group(1)
            elif RE_REMARK.match(line):
                contact_file.remark = RE_REMARK.match(line).group(1)
            elif RE_METHOD.match(line):
                contact_file.method = RE_METHOD.match(line).group(1)
            elif RE_MODEL.match(line):
                contact_map = ContactMap(RE_MODEL.match(line).group(1))
                seq_chunks = []
                while True:
                    try:
                        line = next(it)
                    except StopIteration:
                        break
                    if not line:
                        break
                    if RE_ENDMDL.match(line):
                        break
                    elif RE_END.match(line):
                        break
                    elif RE_SEQ.match(line):
                        seq_chunks.append(line)
                    else:
                        res1_entry, res2_entry, lb, ub, raw_score = RE_SPLIT.split(
                            line)
                        # Split in case we have chain in inter-molecular scenarios
                        res1_split = RE_RES.split(res1_entry)
                        if len(res1_split) == 1:
                            res1_chain, res1_seq = "", res1_split[0]
                        elif len(res1_split) == 4:
                            res1_chain, res1_seq = res1_split[1], res1_split[2]
                        res2_split = RE_RES.split(res2_entry)
                        if len(res2_split) == 1:
                            res2_chain, res2_seq = "", res2_split[0]
                        elif len(res2_split) == 4:
                            res2_chain, res2_seq = res2_split[1], res2_split[2]
                        contact = Contact(int(res1_seq),
                                          int(res2_seq),
                                          float(raw_score),
                                          distance_bound=(float(lb),
                                                          float(ub)))
                        contact.res1_chain = res1_chain
                        contact.res2_chain = res2_chain
                        contact.res1_altseq = int(res1_seq)
                        contact.res2_altseq = int(res2_seq)
                        contact_map.add(contact)
                if seq_chunks:
                    seq = "".join(seq_chunks)
                    sequence = Sequence("seq_{}".format(contact_map.id), seq)
                    contact_map.sequence = sequence
                    contact_map.set_sequence_register()
                contact_file.add(contact_map)
            elif RE_END.match(line):
                break
            else:
                raise ValueError(
                    "Unrecognized line type. Please report this issue")
        return contact_file
Example #4
0
    def read(self, f_handle, f_id="pcons"):
        """Read a contact file

        Parameters
        ----------
        f_handle
           Open file handle [read permissions]
        f_id : str, optional
           Unique contact file identifier

        Returns
        -------
        :obj:`~conkit.core.contactfile.ContactFile`

        """
        contact_file = ContactFile(f_id)
        contact_map = ContactMap("1")
        contact_file.add(contact_map)

        lines = iter([l.rstrip() for l in f_handle if l.rstrip()])
        done = object()
        line = next(lines, done)

        seq = ''
        seq_id = 'seq_1'

        while line is not done:

            if not line:
                pass

            elif RE_GENERATED.match(line):
                contact_file.remark = line

            elif RE_SEQUENCE_NAME.match(line):
                seq_id = RE_SEQUENCE_NAME.match(line).group(1)

            elif RE_SEQUENCE.match(line):
                line = next(lines, done)
                while line is not done:
                    if not line:
                        break
                    elif RE_CONTACT_HEADER.match(line):
                        break
                    elif RE_PRED_CONTACTS.match(line):
                        break
                    elif RE_CONTACT.match(line):
                        break
                    else:
                        seq += line
                    line = next(lines, done)

            if RE_CONTACT.match(line):
                res1_seq, res2_seq, raw_score = line.split()
                contact = Contact(int(res1_seq), int(res2_seq),
                                  float(raw_score))
                contact_map.add(contact)

            line = next(lines, done)

        if seq:
            contact_map.sequence = Sequence(seq_id, seq)

        contact_file.method = 'Contact map predicted using Pcons'

        return contact_file
Example #5
0
 def test_remark_5(self):
     contact_file = ContactFile("test")
     contact_file.remark = "hello"
     contact_map = ContactMap("foo")
     contact_file.add(contact_map)
     self.assertEqual(["hello"], contact_file.remark)
Example #6
0
 def test_remark_2(self):
     contact_file = ContactFile("test")
     contact_file.remark = "Hello"
     contact_file.remark = "World"
     self.assertEqual(["Hello", "World"], contact_file.remark)
Example #7
0
 def test_remark_1(self):
     contact_file = ContactFile("test")
     contact_file.remark = "Hello"
     self.assertEqual(["Hello"], contact_file.remark)