Example #1
0
 def test_get_probability_within_distance_2(self):
     distogram = Distogram('test')
     distogram.original_file_format = 'pdb'
     distance = Distance(36, 86, (1, ), ((6.589181, 6.589181), ), 0.934108)
     distogram.add(distance)
     self.assertEqual(1, distance.get_probability_within_distance(8))
     self.assertEqual(0, distance.get_probability_within_distance(5))
Example #2
0
 def test_reshape_bins_2(self):
     distogram = Distogram('test')
     distogram.original_file_format = 'pdb'
     distance = Distance(36, 86, (1, ), ((6.589181, 6.589181), ), 0.934108)
     distogram.add(distance)
     with self.assertRaises(ValueError):
         distance.reshape_bins(((0, 1), (1, 10), (10, np.inf)))
Example #3
0
 def dist_bins(self, dist_bins):
     if dist_bins is None:
         self._dist_bins = ((0, 4), (4, 6), (6, 8), (8, 10), (10, 12), (12,
                                                                        14),
                            (14, 16), (16, 18), (18, 20), (20, np.inf))
     else:
         Distance._assert_valid_bins(dist_bins)
         self._dist_bins = dist_bins
Example #4
0
 def test_get_unique_distances_1(self):
     distogram = Distogram("test")
     distogram.add(Distance(1, 25, (0.25, 0.45, 0.25, 0.05), ((0, 4), (4, 6), (6, 8), (8, np.inf))))
     distogram.add(Distance(25, 1, (0.25, 0.45, 0.25, 0.05), ((0, 4), (4, 6), (6, 8), (8, np.inf))))
     distogram.add(Distance(7, 19, (0.15, 0.15, 0.60, 0.1), ((0, 4), (4, 6), (6, 8), (8, np.inf))))
     distogram.add(Distance(19, 7, (0.15, 0.15, 0.60, 0.1), ((0, 4), (4, 6), (6, 8), (8, np.inf))))
     distogram.add(Distance(1, 7, (0.1, 0.2, 0.55, 0.15), ((0, 4), (4, 6), (6, 8), (8, np.inf))))
     distogram.get_unique_distances(inplace=True)
     self.assertListEqual([[25, 1], [19, 7], [1, 7]], distogram.as_list())
Example #5
0
 def test_reshape_bins_1(self):
     distance = Distance(1, 25, (0.15, 0.45, 0.25, 0.05, 0.1),
                         ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf)))
     new_bins = ((0, 2), (2, 8), (8, np.inf))
     distance.reshape_bins(new_bins)
     self.assertEqual(distance.raw_score, 0.85)
     self.assertEqual(round(distance.get_probability_within_distance(8), 2),
                      0.85)
     self.assertTupleEqual(new_bins, distance.distance_bins)
     self.assertTupleEqual((0.075, 0.775, 0.15000000000000002),
                           distance.distance_scores)
Example #6
0
    def test_find_residues_within_1(self):
        distogram_1 = Distogram("test_1")
        distogram_1.add(Distance(1, 5, (0.25, 0.45, 0.05, 0.05, 0.2), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf))))
        distogram_1.add(Distance(2, 3, (0.15, 0.15, 0.60, 0.1, 0.0), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf))))
        distogram_1.add(Distance(1, 4, (0.05, 0.2, 0.0, 0.6, 0.15), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf))))
        distogram_1.add(Distance(3, 5, (0.4, 0.1, 0.35, 0.05, 0.1), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf))))
        distogram_1.sequence = conkit.core.Sequence("test_seq", "AAAAA")

        output = distogram_1.find_residues_within(3, 7)
        expected = {2, 3, 5}

        self.assertSetEqual(expected, output)
Example #7
0
 def test_as_contactmap_1(self):
     distogram = Distogram("test")
     distogram.add(Distance(1, 5, (0.25, 0.45, 0.05, 0.05, 0.2), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf))))
     distogram.add(Distance(2, 3, (0.15, 0.15, 0.60, 0.1, 0.0), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf))))
     distogram.add(Distance(1, 4, (0.05, 0.2, 0.0, 0.6, 0.15), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf))))
     distogram.add(Distance(3, 5, (0.4, 0.1, 0.35, 0.05, 0.1), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf))))
     contactmap = distogram.as_contactmap()
     expected_res1 = [1, 2, 3]
     expected_res2 = [5, 3, 5]
     expected_raw_score = [0.75, 0.8999999999999999, 0.85]
     self.assertListEqual([contact.res1_seq for contact in contactmap], expected_res1)
     self.assertListEqual([contact.res2_seq for contact in contactmap], expected_res2)
     self.assertListEqual([contact.raw_score for contact in contactmap], expected_raw_score)
Example #8
0
    def test_merge_arrays_1(self):
        distogram_1 = Distogram("test_1")
        distogram_1.add(Distance(1, 5, (0.25, 0.45, 0.05, 0.05, 0.2), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf))))
        distogram_1.add(Distance(2, 3, (0.15, 0.15, 0.60, 0.1, 0.0), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf))))
        distogram_1.add(Distance(1, 4, (0.05, 0.2, 0.0, 0.6, 0.15), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf))))
        distogram_1.add(Distance(3, 5, (0.4, 0.1, 0.35, 0.05, 0.1), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf))))
        distogram_1.sequence = conkit.core.Sequence("test_seq", "AAAAA")

        distogram_2 = Distogram("test_2")
        distogram_2.add(Distance(1, 5, (0.45, 0.05, 0.25, 0.25), ((0, 4), (4, 6), (6, 8), (8, np.inf))))
        distogram_2.add(Distance(2, 3, (0.1, 0.15, 0.15, 0.6), ((0, 4), (4, 6), (6, 8), (8, np.inf))))
        distogram_2.add(Distance(1, 4, (0.75, 0.20, 0.05, 0.0), ((0, 4), (4, 6), (6, 8), (8, np.inf))))
        distogram_2.add(Distance(3, 5, (0.05, 0.1, 0.35, 0.5), ((0, 4), (4, 6), (6, 8), (8, np.inf))))
        distogram_2.sequence = conkit.core.Sequence("test_seq", "AAAAA")

        output = Distogram.merge_arrays(distogram_1, distogram_2)
        output[np.isinf(output)] = 99999
        output = np.nan_to_num(output).tolist()

        expected = [
            [0.0, 0.0, 0.0, 9.0, 5.0],
            [0.0, 0.0, 7.0, 0.0, 0.0],
            [0.0, 99999, 0.0, 0.0, 2.0],
            [2.0, 0.0, 0.0, 0.0, 0.0],
            [2.0, 0.0, 99999, 0.0, 0.0]]

        self.assertListEqual(output, expected)
Example #9
0
    def read(self, f_handle, f_id="rosettanpz"):
        """Read a distance prediction file

        Parameters
        ----------
        f_handle
           Open file handle [read permissions]
        f_id : str, optional
           Unique contact file identifier

        Returns
        -------
        :obj:`~conkit.core.distancefile.DistanceFile`

        """

        hierarchy = DistanceFile(f_id)
        hierarchy.original_file_format = "ROSETTA_NPZ"
        _map = Distogram("distogram_1")
        hierarchy.add(_map)

        prediction = np.load(f_handle, allow_pickle=True)
        probs = prediction['dist']
        # Bin #0 corresponds with d>20A & bins #1 ~ #36 correspond with 2A<d<20A in increments of 0.5A
        probs = probs[:, :, [x for x in range(1, 37)] + [0]]

        L = probs.shape[0]
        for i in range(L):
            for j in range(i, L):
                _distance = Distance(i + 1, j + 1, tuple(probs[i, j, :].tolist()), DISTANCE_BINS)
                _map.add(_distance)

        return hierarchy
Example #10
0
 def test_reshape_bins_1(self):
     distogram = Distogram("test")
     distogram.add(Distance(1, 5, (0.25, 0.45, 0.25, 0.05), ((0, 4), (4, 6), (6, 8), (8, np.inf))))
     distogram.add(Distance(2, 3, (0.15, 0.15, 0.60, 0.1), ((0, 4), (4, 6), (6, 8), (8, np.inf))))
     distogram.add(Distance(1, 4, (0.05, 0.25, 0.70, 0.0), ((0, 4), (4, 6), (6, 8), (8, np.inf))))
     distogram.add(Distance(3, 5, (0.5, 0.1, 0.35, 0.05), ((0, 4), (4, 6), (6, 8), (8, np.inf))))
     new_bins = ((0, 2), (2, 8), (8, np.inf))
     distogram.reshape_bins(new_bins)
     expected_raw_scores = [0.95, 0.8999999999999999, 1.0, 0.95]
     expected_distance_scores = [(0.125, 0.825, 0.050000000000000044), (0.075, 0.825, 0.09999999999999998),
                                 (0.025, 0.975, 0.0), (0.25, 0.7, 0.050000000000000044)]
     expected_predicted_distances = [5.0, 5.0, 5.0, 5.0]
     self.assertListEqual(expected_predicted_distances, [dist.predicted_distance for dist in distogram])
     self.assertListEqual([dist.distance_bins for dist in distogram], [new_bins for dist in distogram])
     self.assertListEqual([dist.get_probability_within_distance(8) for dist in distogram], expected_raw_scores)
     self.assertListEqual([dist.distance_scores for dist in distogram], expected_distance_scores)
Example #11
0
    def read(self, f_handle, f_id="casp2"):
        """Read a distance prediction file

        Parameters
        ----------
        f_handle
           Open file handle [read permissions]
        f_id : str, optional
           Unique contact file identifier

        Returns
        -------
        :obj:`~conkit.core.distancefile.DistanceFile`

        """

        hierarchy = DistanceFile(f_id)
        hierarchy.original_file_format = "CASPRR_MODE_2"
        _map = Distogram("distogram_1")
        hierarchy.add(_map)

        for line in f_handle.readlines():
            line = line.lstrip().rstrip().split()
            if not line or len(line) != 13 or not line[0].isdigit() or not line[1].isdigit():
                continue

            res1_seq = int(line[0])
            res2_seq = int(line[1])
            raw_score = float(line[2])
            distance_scores = tuple([float(p) for p in line[3:]])
            _distance = Distance(res1_seq, res2_seq, distance_scores, DISTANCE_BINS, raw_score=raw_score)
            _map.add(_distance)

        return hierarchy
Example #12
0
    def test_as_array_1(self):
        distogram = Distogram("test")
        distogram.add(Distance(1, 5, (0.25, 0.45, 0.25, 0.05), ((0, 4), (4, 6), (6, 8), (8, np.inf))))
        distogram.add(Distance(2, 3, (0.15, 0.15, 0.60, 0.1), ((0, 4), (4, 6), (6, 8), (8, np.inf))))
        distogram.add(Distance(1, 4, (0.05, 0.25, 0.70, 0.0), ((0, 4), (4, 6), (6, 8), (8, np.inf))))
        distogram.add(Distance(3, 5, (0.5, 0.1, 0.35, 0.05), ((0, 4), (4, 6), (6, 8), (8, np.inf))))
        output = np.nan_to_num(distogram.as_array(seq_len=5)).tolist()

        expected = [
            [0., 0., 0., 7.0, 5.0],
            [0., 0., 7.0, 0., 0.],
            [0., 7.0, 0., 0., 2.0],
            [7.0, 0., 0., 0., 0.],
            [5.0, 0., 2.0, 0., 0.]
        ]

        self.assertListEqual(output, expected)
Example #13
0
    def test_write_1(self):
        expected_output = """PFRMAT RR
RMODE 2
1 6 0.199696 0.043889 0.085795 0.070011 0.071518 0.054028 0.213284 0.069087 0.097959 0.090083 0.204345
1 7 0.233644 0.049411 0.075135 0.109098 0.150810 0.096584 0.092398 0.096662 0.093350 0.123176 0.113375
1 8 0.246451 0.106886 0.039024 0.100540 0.082028 0.108344 0.078788 0.105980 0.130109 0.113708 0.134592
1 9 0.267139 0.072002 0.083053 0.112084 0.124356 0.128044 0.097491 0.132106 0.047198 0.110915 0.092751
1 10 0.351914 0.081445 0.069721 0.200748 0.099755 0.090368 0.117449 0.127677 0.050879 0.101965 0.059993
2 7 0.228459 0.085973 0.091366 0.051120 0.085890 0.070657 0.119253 0.082744 0.180051 0.097734 0.135213
2 8 0.256177 0.081094 0.077748 0.097335 0.060811 0.138077 0.130496 0.106911 0.101101 0.121346 0.085081
2 9 0.216631 0.046454 0.053018 0.117160 0.196036 0.144154 0.125199 0.090720 0.052621 0.098583 0.076055
2 10 0.284653 0.087567 0.125308 0.071778 0.071988 0.095966 0.099270 0.174715 0.109563 0.062611 0.101233
3 8 0.345583 0.117500 0.110134 0.117950 0.085312 0.098812 0.072826 0.079326 0.196758 0.059058 0.062325
3 9 0.203586 0.036574 0.050725 0.116287 0.174339 0.070881 0.116388 0.083683 0.060738 0.160257 0.130128
3 10 0.293849 0.059364 0.135117 0.099368 0.113124 0.135930 0.066876 0.075962 0.114771 0.127034 0.072454
4 9 0.234649 0.077170 0.048841 0.108638 0.107559 0.119732 0.116349 0.077063 0.111788 0.119497 0.113362
4 10 0.322930 0.090789 0.133412 0.098729 0.099123 0.084633 0.107534 0.137072 0.096560 0.042234 0.109913
5 10 0.279782 0.054314 0.114427 0.111042 0.069073 0.083048 0.105829 0.073806 0.119769 0.088666 0.180028"""

        distancefile = DistanceFile("test")
        distancefile.original_file_format = 'ALPHAFOLD2'
        distogram = Distogram("1")
        distancefile.add(distogram)

        list_res1 = [1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5]
        list_res2 = [6, 7, 8, 9, 10, 7, 8, 9, 10, 8, 9, 10, 9, 10, 10]
        bin_edges = (2.3125, 2.625, 2.9375, 3.25, 3.5625, 3.875, 4.1875, 4.5,
                     4.8125, 5.125, 5.4375, 5.75, 6.0625, 6.375, 6.6875,
                     6.9999995, 7.3125, 7.625, 7.9375, 8.25, 8.5625, 8.875,
                     9.1875, 9.5, 9.812499, 10.124999, 10.4375, 10.75, 11.0625,
                     11.375, 11.687499, 12., 12.3125, 12.625, 12.9375, 13.25,
                     13.5625, 13.874999, 14.187501, 14.499999, 14.812499,
                     15.124999, 15.437499, 15.75, 16.0625, 16.375, 16.687502,
                     16.999998, 17.312498, 17.624998, 17.937498, 18.25,
                     18.5625, 18.875, 19.1875, 19.5, 19.8125, 20.125,
                     20.437498, 20.75, 21.062498, 21.374998, 21.6875)
        distance_bins = [(0, bin_edges[0])]
        distance_bins += [(bin_edges[idx], bin_edges[idx + 1])
                          for idx in range(len(bin_edges) - 1)]
        distance_bins.append((bin_edges[-1], np.inf))
        distance_bins = tuple(distance_bins)

        np.random.seed(41)
        for res_1, res_2 in zip(list_res1, list_res2):
            distance_scores = np.random.dirichlet(np.ones(64)).tolist()
            distance = Distance(res_1, res_2, distance_scores, distance_bins)
            distogram.add(distance)

        f_name = self.tempfile()
        with open(f_name, "w") as f_out:
            CaspMode2Parser().write(f_out, distogram)

        with open(f_name, "r") as f_in:
            output = f_in.read().splitlines()

        self.assertListEqual(expected_output.split('\n'), output)
Example #14
0
 def test_get_probability_within_distance_1(self):
     distance = Distance(1, 25, (0.15, 0.45, 0.25, 0.05, 0.1),
                         ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf)))
     self.assertEqual(distance.raw_score, 0.85)
     self.assertEqual(distance.get_probability_within_distance(5), 0.375)
     self.assertEqual(distance.get_probability_within_distance(8), 0.85)
     self.assertEqual(distance.get_probability_within_distance(10), 0.9)
     self.assertEqual(distance.get_probability_within_distance(25),
                      0.999999969409768)
     self.assertEqual(distance.get_probability_within_distance(np.inf), 1)
     self.assertEqual(distance.get_probability_within_distance(0), 0)
     with self.assertRaises(ValueError):
         distance.get_probability_within_distance(-5)
Example #15
0
    def test_calculate_rmsd_3(self):
        distogram_1 = Distogram("test_1")
        distogram_1.add(Distance(1, 5, (0.25, 0.45, 0.05, 0.05, 0.2), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf))))
        distogram_1.add(Distance(2, 3, (0.15, 0.15, 0.60, 0.1, 0.0), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf))))
        distogram_1.add(Distance(1, 4, (0.05, 0.2, 0.0, 0.6, 0.15), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf))))
        distogram_1.add(Distance(3, 5, (0.4, 0.1, 0.35, 0.05, 0.1), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf))))
        distogram_1.sequence = conkit.core.Sequence("test_seq", "AAAAA")

        distogram_2 = Distogram("test_2")
        distogram_2.add(Distance(1, 5, (0.45, 0.05, 0.25, 0.25), ((0, 4), (4, 6), (6, 8), (8, np.inf))))
        distogram_2.add(Distance(2, 3, (0.1, 0.15, 0.15, 0.6), ((0, 4), (4, 6), (6, 8), (8, np.inf))))
        distogram_2.add(Distance(1, 4, (0.75, 0.20, 0.05, 0.0), ((0, 4), (4, 6), (6, 8), (8, np.inf))))
        distogram_2.add(Distance(3, 5, (0.05, 0.1, 0.35, 0.5), ((0, 4), (4, 6), (6, 8), (8, np.inf))))
        distogram_2.add(Distance(1, 6, (0.5, 0.1, 0.35, 0.05), ((0, 4), (4, 6), (6, 8), (8, np.inf))))
        distogram_2.sequence = conkit.core.Sequence("test_seq", "AAAAA")

        with self.assertRaises(ValueError):
            Distogram.calculate_rmsd(distogram_1, distogram_2, seq_len=5, calculate_wrmsd=True)
Example #16
0
    def reshape_bins(self, new_bins):
        """Reshape the predicted distance bins for all :obj:`~conkit.core.distance.Distance` instances. This will
        update :attr:`~conkit.core.distance.Distance.distance_scores` and
        :attr:`~conkit.core.distance.Distance.distance_bins` to fit the new bins.

        Parameters
        ----------
        new_bins : tuple
           A tuple of tuples, where each element corresponds with the upper and lower edges of the intervals for
           the new distance bins

        Raises
        ------
        :exc:`ValueError`
           The new distance bins are not valid
        """
        if self.original_file_format == 'pdb':
            raise ValueError(
                'Cannot re-shape bins obtained from a PDB structure file')
        Distance._assert_valid_bins(new_bins)

        for distance in self:
            distance._reshape_bins(new_bins)
Example #17
0
    def test_calculate_rmsd_2(self):
        distogram_1 = Distogram("test_1")
        distogram_1.add(Distance(1, 5, (0.25, 0.45, 0.05, 0.05, 0.2), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf))))
        distogram_1.add(Distance(2, 3, (0.15, 0.15, 0.60, 0.1, 0.0), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf))))
        distogram_1.add(Distance(1, 4, (0.05, 0.2, 0.0, 0.6, 0.15), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf))))
        distogram_1.add(Distance(3, 5, (0.4, 0.1, 0.35, 0.05, 0.1), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf))))
        distogram_1.sequence = conkit.core.Sequence("test_seq", "AAAAA")

        distogram_2 = Distogram("test_2")
        distogram_2.add(Distance(1, 5, (0.45, 0.05, 0.25, 0.25), ((0, 4), (4, 6), (6, 8), (8, np.inf))))
        distogram_2.add(Distance(2, 3, (0.1, 0.15, 0.15, 0.6), ((0, 4), (4, 6), (6, 8), (8, np.inf))))
        distogram_2.add(Distance(1, 4, (0.75, 0.20, 0.05, 0.0), ((0, 4), (4, 6), (6, 8), (8, np.inf))))
        distogram_2.add(Distance(3, 5, (0.05, 0.1, 0.35, 0.5), ((0, 4), (4, 6), (6, 8), (8, np.inf))))
        distogram_2.sequence = conkit.core.Sequence("test_seq", "AAAAA")

        output = Distogram.calculate_rmsd(distogram_1, distogram_2, seq_len=5, calculate_wrmsd=True)
        expected = [4.09, 2.324, 3.937, 5.422, 3.85]

        self.assertListEqual(expected, [round(x, 3) for x in output])
Example #18
0
    def test_write_1(self):
        expected_output = """#REMARK MapPred 1.1
#REMARK idx_i, idx_j, distance distribution of 34 bins
#REMARK 34 bins consist of 32 normal bins (4-20A with a step of 0.5A) and two boundary bins ( [0,4) and [20, inf) ), as follows: [0,4,4.5,5,5.5,6,6.5,7,7.5,8,8.5,9,9.5,10,10.5,11,11.5,12,12.5,13,13.5,14,14.5,15,15.5,16,16.5,17,17.5,18,18.5,19,19.5,20,inf]
5 10 0.013746 0.002245 0.053742 0.002115 0.005889 0.044058 0.010081 0.052535 0.118677 0.025818 0.019215 0.015831 0.009808 0.018148 0.031220 0.003428 0.058081 0.017978 0.065069 0.024163 0.044585 0.062025 0.026062 0.023824 0.012573 0.027729 0.022212 0.041685 0.005015 0.064340 0.004133 0.006420 0.018552 0.048998
1 35 0.187103 0.008180 0.021642 0.051089 0.038619 0.006100 0.010553 0.031697 0.010831 0.015310 0.006949 0.008237 0.043400 0.051436 0.003820 0.008148 0.018467 0.057307 0.022873 0.029184 0.008235 0.008025 0.004214 0.027027 0.070948 0.028355 0.049284 0.060124 0.041885 0.043900 0.000681 0.006836 0.007679 0.011862
43 85 0.024968 0.014838 0.021987 0.031265 0.019144 0.033038 0.018177 0.008716 0.017331 0.046459 0.051147 0.043912 0.004041 0.007990 0.027690 0.073997 0.001269 0.008161 0.067709 0.055700 0.028615 0.091884 0.021842 0.025949 0.025295 0.006136 0.031655 0.028990 0.082802 0.005069 0.002322 0.015611 0.039637 0.016654
85 43 0.015871 0.013765 0.006593 0.014670 0.029273 0.042705 0.058513 0.014858 0.050493 0.014216 0.010146 0.037020 0.018679 0.003142 0.031215 0.011736 0.008920 0.007325 0.144325 0.003512 0.018591 0.005043 0.001607 0.043659 0.068744 0.052532 0.050643 0.039295 0.003413 0.035119 0.102032 0.004150 0.005737 0.032456
50 50 0.000490 0.027392 0.001090 0.009625 0.011421 0.002011 0.015100 0.018622 0.008785 0.114531 0.044962 0.019562 0.022973 0.008111 0.042691 0.061367 0.001060 0.032753 0.073944 0.006790 0.002509 0.073759 0.025060 0.031361 0.039123 0.043318 0.032752 0.004280 0.044655 0.000556 0.000111 0.095043 0.028036 0.056157
18 50 0.002704 0.015000 0.024442 0.105520 0.014259 0.027628 0.002832 0.035063 0.038354 0.055931 0.039683 0.035546 0.004621 0.019932 0.012316 0.087781 0.006637 0.043857 0.008459 0.053482 0.016937 0.083507 0.031733 0.000793 0.004304 0.066937 0.009968 0.006859 0.038950 0.064003 0.003185 0.008042 0.007331 0.023401"""
        distancefile = DistanceFile("test")
        distancefile.original_file_format = 'MAPPRED'
        distogram = Distogram("1")
        distancefile.add(distogram)

        list_res1 = [5, 1, 43, 85, 50, 18]
        list_res2 = [10, 35, 85, 43, 50, 50]
        distance_bins = ((0, 4), (4, 4.5), (4.5, 5), (5, 5.5), (5.5, 6),
                         (6, 6.5), (6.5, 7), (7, 7.5), (7.5, 8), (8, 8.5),
                         (8.5, 9), (9, 9.5), (9.5, 10), (10, 10.5), (10.5, 11),
                         (11, 11.5), (11.5, 12), (12, 12.5), (12.5, 13),
                         (13, 13.5), (13.5, 14), (14, 14.5), (14.5, 15),
                         (15, 15.5), (15.5, 16), (16, 16.5), (16.5, 17),
                         (17, 17.5), (17.5, 18), (18, 18.5), (18.5, 19),
                         (19, 19.5), (19.5, 20), (20, np.inf))

        np.random.seed(41)
        for res_1, res_2 in zip(list_res1, list_res2):
            distance_scores = np.random.dirichlet(np.ones(34)).tolist()
            distance = Distance(res_1, res_2, distance_scores, distance_bins)
            distogram.add(distance)

        f_name = self.tempfile()
        with open(f_name, "w") as f_out:
            MapPredParser().write(f_out, distogram)

        with open(f_name, "r") as f_in:
            output = f_in.read().splitlines()

        self.assertListEqual(expected_output.split("\n"), output)
Example #19
0
    def read(self, f_handle, f_id="alphafold2"):
        """Read a distance prediction file

        Parameters
        ----------
        f_handle
           Open file handle [read permissions]
        f_id : str, optional
           Unique contact file identifier

        Returns
        -------
        :obj:`~conkit.core.distancefile.DistanceFile`

        """

        hierarchy = DistanceFile(f_id)
        hierarchy.original_file_format = "alphafold2"
        _map = Distogram("distogram_1")
        hierarchy.add(_map)

        prediction = np.load(f_handle, allow_pickle=True)
        predicted_distogram = prediction['distogram']
        probs = softmax(predicted_distogram['logits'], axis=-1)
        bin_edges = predicted_distogram['bin_edges']

        distance_bins = [(0, bin_edges[0])]
        distance_bins += [(bin_edges[idx], bin_edges[idx + 1]) for idx in range(len(bin_edges) - 1)]
        distance_bins.append((bin_edges[-1], np.inf))
        distance_bins = tuple(distance_bins)
        L = probs.shape[0]
        for i in range(L):
            for j in range(i, L):
                _distance = Distance(i + 1, j + 1, tuple(probs[i, j, :].tolist()), distance_bins)
                _map.add(_distance)

        return hierarchy
Example #20
0
 def test_get_absent_residues_2(self):
     distogram = Distogram("test")
     distogram.add(Distance(1, 5, (0.25, 0.45, 0.25, 0.05), ((0, 4), (4, 6), (6, 8), (8, np.inf))))
     distogram.add(Distance(2, 3, (0.15, 0.15, 0.60, 0.1), ((0, 4), (4, 6), (6, 8), (8, np.inf))))
     distogram.sequence = Sequence('test', 'AAAAAAA')
     self.assertListEqual([4, 6, 7], distogram.get_absent_residues())
Example #21
0
 def test_predicted_distance_2(self):
     distogram = Distogram('test')
     distogram.original_file_format = 'pdb'
     distance = Distance(36, 86, (1, ), ((6.589181, 6.589181), ), 0.934108)
     distogram.add(distance)
     self.assertEqual(6.589181, distance.predicted_distance)
Example #22
0
 def test_predicted_distance_3(self):
     distance = Distance(2, 3, (0.2, 0.3, 0.3, 0.2),
                         ((0, 4), (4, 6), (6, 8), (8, np.inf)))
     self.assertEqual(distance.max_score, 0.3)
     self.assertTupleEqual(distance.predicted_distance_bin, (4, 6))
     self.assertEqual(distance.predicted_distance, 5)
Example #23
0
 def test_ndistances_1(self):
     distogram = Distogram("test")
     distogram.add(Distance(1, 25, (0.25, 0.45, 0.25, 0.05), ((0, 4), (4, 6), (6, 8), (8, np.inf))))
     distogram.add(Distance(7, 19, (0.15, 0.15, 0.60, 0.1), ((0, 4), (4, 6), (6, 8), (8, np.inf))))
     self.assertEqual(2, distogram.ndistances)
Example #24
0
 def test_original_file_format_setter_2(self):
     distogram = Distogram("test")
     distogram.add(Distance(1, 25, (0.25, 0.45, 0.25, 0.05), ((0, 4), (4, 6), (6, 8), (8, np.inf))))
     with self.assertRaises(ValueError):
         distogram.original_file_format = 'mock_format'
Example #25
0
 def test_predicted_distance_1(self):
     distance = Distance(1, 25, (0.15, 0.45, 0.25, 0.05, 0.1),
                         ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf)))
     self.assertEqual(distance.max_score, 0.45)
     self.assertTupleEqual(distance.predicted_distance_bin, (4, 6))
     self.assertEqual(distance.predicted_distance, 5)
Example #26
0
 def test_original_file_format_setter_1(self):
     distogram = Distogram("test")
     distogram.add(Distance(1, 25, (0.25, 0.45, 0.25, 0.05), ((0, 4), (4, 6), (6, 8), (8, np.inf))))
     distogram.original_file_format = 'mmcif'
Example #27
0
 def test__assert_valid_bins_5(self):
     distance = Distance(1, 25, (0.15, 0.45, 0.25, 0.05, 0.1),
                         ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf)))
     with self.assertRaises(ValueError):
         distance._assert_valid_bins(
             ((0, 1), (1, 10), (10, 20), (25, 30), (30, 31), (45, np.inf)))
Example #28
0
    def _read(self, structure, f_id, distance_cutoff, atom_type):
        """Read a contact file

        Parameters
        ----------
        structure
           A :obj:`~Bio.PDB.Structure.Structure>` instance
        f_id : str
           Unique contact file identifier
        distance_cutoff : int
           Distance cutoff for which to determine contacts
        atom_type : str
           Atom type between which distances are calculated

        Returns
        -------
        :obj:`~conkit.core.distancefile.DistanceFile~`

        """
        hierarchies = []
        distance_bound = (0.0, float(distance_cutoff))
        for model in structure:
            hierarchy = DistanceFile(f_id + "_" + str(model.id))
            hierarchy.original_file_format = "PDB"
            chains = list(chain for chain in model)

            for chain in chains:
                self._remove_hetatm(chain)
                self._remove_atom(chain, atom_type)

            for chain1, chain2 in itertools.product(chains, chains):
                if chain1.id == chain2.id:  # intra
                    distogram = Distogram(chain1.id)
                else:  # inter
                    distogram = Distogram(chain1.id + chain2.id)

                for (atom1, atom2,
                     distance) in self._chain_contacts(chain1, chain2):
                    if distance < distance_cutoff:
                        score = round(1.0 - (distance / 100), 6)
                    else:
                        score = 0

                    dist = Distance(atom1.resseq, atom2.resseq, (1, ),
                                    ((distance, distance), ), score,
                                    distance_bound)
                    dist.res1_altseq = atom1.resseq_alt
                    dist.res2_altseq = atom2.resseq_alt
                    dist.res1 = atom1.resname
                    dist.res2 = atom2.resname
                    dist.res1_chain = atom1.reschain
                    dist.res2_chain = atom2.reschain

                    if distance_cutoff == 0 or distance < distance_cutoff:
                        dist.true_positive = True

                    distogram.add(dist)

                if distogram.empty:
                    del distogram
                else:
                    if len(distogram.id) == 1:
                        distogram.sequence = self._build_sequence(chain1)
                        assert len(distogram.sequence.seq) == len(chain1)
                    else:
                        distogram.sequence = self._build_sequence(
                            chain1) + self._build_sequence(chain2)
                        assert len(distogram.sequence.seq
                                   ) == len(chain1) + len(chain2)
                    hierarchy.add(distogram)

            hierarchy.method = "Distogram extracted from PDB " + str(model.id)
            hierarchy.remark = [
                "The model id is the chain identifier, i.e XY equates to chain X and chain Y.",
                "Residue numbers in column 1 are chain X, and numbers in column 2 are chain Y.",
            ]
            hierarchies.append(hierarchy)

        if len(hierarchies) > 1:
            msg = "Super-level to contact file not yet implemented. " "Parser returns hierarchy for top model only!"
            warnings.warn(msg, FutureWarning)
        return hierarchies[0]
Example #29
0
def Distance(*args, **kwargs):
    """:obj:`Contact <conkit.core.distance.Distance>` instance"""
    from conkit.core.distance import Distance

    return Distance(*args, **kwargs)