예제 #1
0
    def read(self, f_handle, f_id="casp2"):
        """Read a distance prediction file

        Parameters
        ----------
        f_handle
           Open file handle [read permissions]
        f_id : str, optional
           Unique contact file identifier

        Returns
        -------
        :obj:`~conkit.core.distancefile.DistanceFile`

        """

        hierarchy = DistanceFile(f_id)
        hierarchy.original_file_format = "CASPRR_MODE_2"
        _map = Distogram("distogram_1")
        hierarchy.add(_map)

        for line in f_handle.readlines():
            line = line.lstrip().rstrip().split()
            if not line or len(line) != 13 or not line[0].isdigit() or not line[1].isdigit():
                continue

            res1_seq = int(line[0])
            res2_seq = int(line[1])
            raw_score = float(line[2])
            distance_scores = tuple([float(p) for p in line[3:]])
            _distance = Distance(res1_seq, res2_seq, distance_scores, DISTANCE_BINS, raw_score=raw_score)
            _map.add(_distance)

        return hierarchy
예제 #2
0
 def test_original_file_format(self):
     distance_file = DistanceFile("test")
     distance_file.original_file_format = "pdb"
     distogram = Distogram("test")
     distance_file.add(distogram)
     self.assertTrue(distogram in distance_file.child_list)
     self.assertEqual("pdb", distogram.original_file_format)
예제 #3
0
    def read(self, f_handle, f_id="rosettanpz"):
        """Read a distance prediction file

        Parameters
        ----------
        f_handle
           Open file handle [read permissions]
        f_id : str, optional
           Unique contact file identifier

        Returns
        -------
        :obj:`~conkit.core.distancefile.DistanceFile`

        """

        hierarchy = DistanceFile(f_id)
        hierarchy.original_file_format = "ROSETTA_NPZ"
        _map = Distogram("distogram_1")
        hierarchy.add(_map)

        prediction = np.load(f_handle, allow_pickle=True)
        probs = prediction['dist']
        # Bin #0 corresponds with d>20A & bins #1 ~ #36 correspond with 2A<d<20A in increments of 0.5A
        probs = probs[:, :, [x for x in range(1, 37)] + [0]]

        L = probs.shape[0]
        for i in range(L):
            for j in range(i, L):
                _distance = Distance(i + 1, j + 1, tuple(probs[i, j, :].tolist()), DISTANCE_BINS)
                _map.add(_distance)

        return hierarchy
예제 #4
0
    def test_write_1(self):
        expected_output = """PFRMAT RR
RMODE 2
1 6 0.199696 0.043889 0.085795 0.070011 0.071518 0.054028 0.213284 0.069087 0.097959 0.090083 0.204345
1 7 0.233644 0.049411 0.075135 0.109098 0.150810 0.096584 0.092398 0.096662 0.093350 0.123176 0.113375
1 8 0.246451 0.106886 0.039024 0.100540 0.082028 0.108344 0.078788 0.105980 0.130109 0.113708 0.134592
1 9 0.267139 0.072002 0.083053 0.112084 0.124356 0.128044 0.097491 0.132106 0.047198 0.110915 0.092751
1 10 0.351914 0.081445 0.069721 0.200748 0.099755 0.090368 0.117449 0.127677 0.050879 0.101965 0.059993
2 7 0.228459 0.085973 0.091366 0.051120 0.085890 0.070657 0.119253 0.082744 0.180051 0.097734 0.135213
2 8 0.256177 0.081094 0.077748 0.097335 0.060811 0.138077 0.130496 0.106911 0.101101 0.121346 0.085081
2 9 0.216631 0.046454 0.053018 0.117160 0.196036 0.144154 0.125199 0.090720 0.052621 0.098583 0.076055
2 10 0.284653 0.087567 0.125308 0.071778 0.071988 0.095966 0.099270 0.174715 0.109563 0.062611 0.101233
3 8 0.345583 0.117500 0.110134 0.117950 0.085312 0.098812 0.072826 0.079326 0.196758 0.059058 0.062325
3 9 0.203586 0.036574 0.050725 0.116287 0.174339 0.070881 0.116388 0.083683 0.060738 0.160257 0.130128
3 10 0.293849 0.059364 0.135117 0.099368 0.113124 0.135930 0.066876 0.075962 0.114771 0.127034 0.072454
4 9 0.234649 0.077170 0.048841 0.108638 0.107559 0.119732 0.116349 0.077063 0.111788 0.119497 0.113362
4 10 0.322930 0.090789 0.133412 0.098729 0.099123 0.084633 0.107534 0.137072 0.096560 0.042234 0.109913
5 10 0.279782 0.054314 0.114427 0.111042 0.069073 0.083048 0.105829 0.073806 0.119769 0.088666 0.180028"""

        distancefile = DistanceFile("test")
        distancefile.original_file_format = 'ALPHAFOLD2'
        distogram = Distogram("1")
        distancefile.add(distogram)

        list_res1 = [1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5]
        list_res2 = [6, 7, 8, 9, 10, 7, 8, 9, 10, 8, 9, 10, 9, 10, 10]
        bin_edges = (2.3125, 2.625, 2.9375, 3.25, 3.5625, 3.875, 4.1875, 4.5,
                     4.8125, 5.125, 5.4375, 5.75, 6.0625, 6.375, 6.6875,
                     6.9999995, 7.3125, 7.625, 7.9375, 8.25, 8.5625, 8.875,
                     9.1875, 9.5, 9.812499, 10.124999, 10.4375, 10.75, 11.0625,
                     11.375, 11.687499, 12., 12.3125, 12.625, 12.9375, 13.25,
                     13.5625, 13.874999, 14.187501, 14.499999, 14.812499,
                     15.124999, 15.437499, 15.75, 16.0625, 16.375, 16.687502,
                     16.999998, 17.312498, 17.624998, 17.937498, 18.25,
                     18.5625, 18.875, 19.1875, 19.5, 19.8125, 20.125,
                     20.437498, 20.75, 21.062498, 21.374998, 21.6875)
        distance_bins = [(0, bin_edges[0])]
        distance_bins += [(bin_edges[idx], bin_edges[idx + 1])
                          for idx in range(len(bin_edges) - 1)]
        distance_bins.append((bin_edges[-1], np.inf))
        distance_bins = tuple(distance_bins)

        np.random.seed(41)
        for res_1, res_2 in zip(list_res1, list_res2):
            distance_scores = np.random.dirichlet(np.ones(64)).tolist()
            distance = Distance(res_1, res_2, distance_scores, distance_bins)
            distogram.add(distance)

        f_name = self.tempfile()
        with open(f_name, "w") as f_out:
            CaspMode2Parser().write(f_out, distogram)

        with open(f_name, "r") as f_in:
            output = f_in.read().splitlines()

        self.assertListEqual(expected_output.split('\n'), output)
예제 #5
0
    def test_write_1(self):
        expected_output = """#REMARK MapPred 1.1
#REMARK idx_i, idx_j, distance distribution of 34 bins
#REMARK 34 bins consist of 32 normal bins (4-20A with a step of 0.5A) and two boundary bins ( [0,4) and [20, inf) ), as follows: [0,4,4.5,5,5.5,6,6.5,7,7.5,8,8.5,9,9.5,10,10.5,11,11.5,12,12.5,13,13.5,14,14.5,15,15.5,16,16.5,17,17.5,18,18.5,19,19.5,20,inf]
5 10 0.013746 0.002245 0.053742 0.002115 0.005889 0.044058 0.010081 0.052535 0.118677 0.025818 0.019215 0.015831 0.009808 0.018148 0.031220 0.003428 0.058081 0.017978 0.065069 0.024163 0.044585 0.062025 0.026062 0.023824 0.012573 0.027729 0.022212 0.041685 0.005015 0.064340 0.004133 0.006420 0.018552 0.048998
1 35 0.187103 0.008180 0.021642 0.051089 0.038619 0.006100 0.010553 0.031697 0.010831 0.015310 0.006949 0.008237 0.043400 0.051436 0.003820 0.008148 0.018467 0.057307 0.022873 0.029184 0.008235 0.008025 0.004214 0.027027 0.070948 0.028355 0.049284 0.060124 0.041885 0.043900 0.000681 0.006836 0.007679 0.011862
43 85 0.024968 0.014838 0.021987 0.031265 0.019144 0.033038 0.018177 0.008716 0.017331 0.046459 0.051147 0.043912 0.004041 0.007990 0.027690 0.073997 0.001269 0.008161 0.067709 0.055700 0.028615 0.091884 0.021842 0.025949 0.025295 0.006136 0.031655 0.028990 0.082802 0.005069 0.002322 0.015611 0.039637 0.016654
85 43 0.015871 0.013765 0.006593 0.014670 0.029273 0.042705 0.058513 0.014858 0.050493 0.014216 0.010146 0.037020 0.018679 0.003142 0.031215 0.011736 0.008920 0.007325 0.144325 0.003512 0.018591 0.005043 0.001607 0.043659 0.068744 0.052532 0.050643 0.039295 0.003413 0.035119 0.102032 0.004150 0.005737 0.032456
50 50 0.000490 0.027392 0.001090 0.009625 0.011421 0.002011 0.015100 0.018622 0.008785 0.114531 0.044962 0.019562 0.022973 0.008111 0.042691 0.061367 0.001060 0.032753 0.073944 0.006790 0.002509 0.073759 0.025060 0.031361 0.039123 0.043318 0.032752 0.004280 0.044655 0.000556 0.000111 0.095043 0.028036 0.056157
18 50 0.002704 0.015000 0.024442 0.105520 0.014259 0.027628 0.002832 0.035063 0.038354 0.055931 0.039683 0.035546 0.004621 0.019932 0.012316 0.087781 0.006637 0.043857 0.008459 0.053482 0.016937 0.083507 0.031733 0.000793 0.004304 0.066937 0.009968 0.006859 0.038950 0.064003 0.003185 0.008042 0.007331 0.023401"""
        distancefile = DistanceFile("test")
        distancefile.original_file_format = 'MAPPRED'
        distogram = Distogram("1")
        distancefile.add(distogram)

        list_res1 = [5, 1, 43, 85, 50, 18]
        list_res2 = [10, 35, 85, 43, 50, 50]
        distance_bins = ((0, 4), (4, 4.5), (4.5, 5), (5, 5.5), (5.5, 6),
                         (6, 6.5), (6.5, 7), (7, 7.5), (7.5, 8), (8, 8.5),
                         (8.5, 9), (9, 9.5), (9.5, 10), (10, 10.5), (10.5, 11),
                         (11, 11.5), (11.5, 12), (12, 12.5), (12.5, 13),
                         (13, 13.5), (13.5, 14), (14, 14.5), (14.5, 15),
                         (15, 15.5), (15.5, 16), (16, 16.5), (16.5, 17),
                         (17, 17.5), (17.5, 18), (18, 18.5), (18.5, 19),
                         (19, 19.5), (19.5, 20), (20, np.inf))

        np.random.seed(41)
        for res_1, res_2 in zip(list_res1, list_res2):
            distance_scores = np.random.dirichlet(np.ones(34)).tolist()
            distance = Distance(res_1, res_2, distance_scores, distance_bins)
            distogram.add(distance)

        f_name = self.tempfile()
        with open(f_name, "w") as f_out:
            MapPredParser().write(f_out, distogram)

        with open(f_name, "r") as f_in:
            output = f_in.read().splitlines()

        self.assertListEqual(expected_output.split("\n"), output)
예제 #6
0
    def read(self, f_handle, f_id="alphafold2"):
        """Read a distance prediction file

        Parameters
        ----------
        f_handle
           Open file handle [read permissions]
        f_id : str, optional
           Unique contact file identifier

        Returns
        -------
        :obj:`~conkit.core.distancefile.DistanceFile`

        """

        hierarchy = DistanceFile(f_id)
        hierarchy.original_file_format = "alphafold2"
        _map = Distogram("distogram_1")
        hierarchy.add(_map)

        prediction = np.load(f_handle, allow_pickle=True)
        predicted_distogram = prediction['distogram']
        probs = softmax(predicted_distogram['logits'], axis=-1)
        bin_edges = predicted_distogram['bin_edges']

        distance_bins = [(0, bin_edges[0])]
        distance_bins += [(bin_edges[idx], bin_edges[idx + 1]) for idx in range(len(bin_edges) - 1)]
        distance_bins.append((bin_edges[-1], np.inf))
        distance_bins = tuple(distance_bins)
        L = probs.shape[0]
        for i in range(L):
            for j in range(i, L):
                _distance = Distance(i + 1, j + 1, tuple(probs[i, j, :].tolist()), distance_bins)
                _map.add(_distance)

        return hierarchy
예제 #7
0
def DistanceFile(*args, **kwargs):
    """:obj:`Contact <conkit.core.distancefile.DistanceFile>` instance"""
    from conkit.core.distancefile import DistanceFile

    return DistanceFile(*args, **kwargs)
예제 #8
0
    def _read(self, structure, f_id, distance_cutoff, atom_type):
        """Read a contact file

        Parameters
        ----------
        structure
           A :obj:`~Bio.PDB.Structure.Structure>` instance
        f_id : str
           Unique contact file identifier
        distance_cutoff : int
           Distance cutoff for which to determine contacts
        atom_type : str
           Atom type between which distances are calculated

        Returns
        -------
        :obj:`~conkit.core.distancefile.DistanceFile~`

        """
        hierarchies = []
        distance_bound = (0.0, float(distance_cutoff))
        for model in structure:
            hierarchy = DistanceFile(f_id + "_" + str(model.id))
            hierarchy.original_file_format = "PDB"
            chains = list(chain for chain in model)

            for chain in chains:
                self._remove_hetatm(chain)
                self._remove_atom(chain, atom_type)

            for chain1, chain2 in itertools.product(chains, chains):
                if chain1.id == chain2.id:  # intra
                    distogram = Distogram(chain1.id)
                else:  # inter
                    distogram = Distogram(chain1.id + chain2.id)

                for (atom1, atom2,
                     distance) in self._chain_contacts(chain1, chain2):
                    if distance < distance_cutoff:
                        score = round(1.0 - (distance / 100), 6)
                    else:
                        score = 0

                    dist = Distance(atom1.resseq, atom2.resseq, (1, ),
                                    ((distance, distance), ), score,
                                    distance_bound)
                    dist.res1_altseq = atom1.resseq_alt
                    dist.res2_altseq = atom2.resseq_alt
                    dist.res1 = atom1.resname
                    dist.res2 = atom2.resname
                    dist.res1_chain = atom1.reschain
                    dist.res2_chain = atom2.reschain

                    if distance_cutoff == 0 or distance < distance_cutoff:
                        dist.true_positive = True

                    distogram.add(dist)

                if distogram.empty:
                    del distogram
                else:
                    if len(distogram.id) == 1:
                        distogram.sequence = self._build_sequence(chain1)
                        assert len(distogram.sequence.seq) == len(chain1)
                    else:
                        distogram.sequence = self._build_sequence(
                            chain1) + self._build_sequence(chain2)
                        assert len(distogram.sequence.seq
                                   ) == len(chain1) + len(chain2)
                    hierarchy.add(distogram)

            hierarchy.method = "Distogram extracted from PDB " + str(model.id)
            hierarchy.remark = [
                "The model id is the chain identifier, i.e XY equates to chain X and chain Y.",
                "Residue numbers in column 1 are chain X, and numbers in column 2 are chain Y.",
            ]
            hierarchies.append(hierarchy)

        if len(hierarchies) > 1:
            msg = "Super-level to contact file not yet implemented. " "Parser returns hierarchy for top model only!"
            warnings.warn(msg, FutureWarning)
        return hierarchies[0]