예제 #1
0
    def test_DSSP_hbonds(self):
        """Test parsing of DSSP hydrogen bond information."""
        dssp, keys = make_dssp_dict("PDB/2BEG.dssp")

        dssp_indices = {v[5] for v in dssp.values()}
        hb_indices = set()

        # The integers preceding each hydrogen bond energy (kcal/mol) in the
        # "N-H-->O    O-->H-N    N-H-->O    O-->H-N" dssp output columns are
        # relative dssp indices. Therefore, "hb_indices" contains the absolute
        # dssp indices of residues participating in (provisional) h-bonds. Note
        # that actual h-bonds are typically determined by an energetic
        # threshold.
        for val in dssp.values():
            hb_indices |= {
                val[5] + x
                for x in (val[6], val[8], val[10], val[12])
            }

        # Check if all h-bond partner indices were successfully parsed.
        self.assertEqual((dssp_indices & hb_indices), hb_indices)
예제 #2
0
 def test_DSSP_noheader_file(self):
     """Test parsing of pregenerated DSSP missing header information."""
     # New DSSP prints a line containing only whitespace and "."
     dssp, keys = make_dssp_dict("PDB/2BEG_noheader.dssp")
     self.assertEqual(len(dssp), 130)
예제 #3
0
 def test_DSSP_file(self):
     """Test parsing of pregenerated DSSP"""
     dssp, keys = make_dssp_dict("PDB/2BEG.dssp")
     self.assertEqual(len(dssp), 130)
예제 #4
0
 def test_DSSP_file(self):
     """Test parsing of pregenerated DSSP."""
     dssp, keys = make_dssp_dict("PDB/2BEG.dssp")
     self.assertEqual(len(dssp), 130)
def generate_node_features(protein_chains,
                           surface,
                           ns: NeighborSearch,
                           only_ca=Constants.GET_ONLY_CA_ATOMS):
    pdb_id = protein_chains[0].get_parent().full_id[0]
    pdb_id = pdb_id[-4:]
    dssp = make_dssp_dict(os.path.join(Constants.DSSP_PATH, pdb_id + '.dssp'))
    get_residues_t = dssp_key_t = min_dist_t = residue_depth_t = atom_d_t = settattr_t = 0

    for chain in protein_chains:
        start = time.time()
        residue_generator = chain.get_residues()
        get_residues_t += time.time() - start

        last_n_residues = deque(
            [None,
             next(residue_generator),
             next(residue_generator, None)])
        while last_n_residues[1] is not None:
            prev_res = last_n_residues.popleft()
            prev_res_name = Constants.EMPTY_STR_FEATURE
            if prev_res is not None:
                prev_res_name = prev_res.resname
            res = last_n_residues[0]

            next_res = last_n_residues[1]
            next_res_name = Constants.EMPTY_STR_FEATURE
            if next_res is not None:
                next_res_name = next_res.resname

            start = time.time()
            is_key = True
            key = res.full_id[2:]
            if key not in dssp[0]:
                key = (key[0], (' ', key[1][1], ' '))
                if key not in dssp[0]:
                    for dssp_key in dssp[0]:
                        if dssp_key[0] == key[0] and dssp_key[1][1] == key[1][
                                1]:
                            key = dssp_key
                            break

                    if key not in dssp[0]:
                        is_key = False
                        # raise Exception(f'DSSP key not found for {key}, model {res.full_id[0]}')
            if is_key:
                dssp_features = dssp[0][key]
            else:
                dssp_features = ('', '-', 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                                 0.0, 0.0, 0.0, 0.0, 0.0)
            dssp_key_t += time.time() - start

            start = time.time()
            is_cb = 'CB' in res
            cb_ca_surf_angle = 0
            ca_cb_surf_angle = 0

            ca_atom = res['CA']
            ca_d, ca_surf_idx = min_dist(ca_atom.get_coord(), surface)
            ca_vec = ca_atom.get_vector()
            if not is_cb:
                # print('there is no CB ..... :(((((((')
                pass
            else:
                cb_vec = res['CB'].get_vector()
                cb_d, cb_surf_idx = min_dist(res['CB'].get_coord(), surface)
                cb_ca_surf_angle = calc_angle(cb_vec, ca_vec,
                                              Vector(surface[ca_surf_idx]))
                ca_cb_surf_angle = calc_angle(ca_vec, cb_vec,
                                              Vector(surface[cb_surf_idx]))
            min_dist_t += time.time() - start

            start = time.time()
            res_d, dist_list = residue_depth(res, surface)
            if res_d is None:
                res_d = 5.0
                print("Nan values!!!")

            if ca_d is None:
                ca_d = 5.0
                print("Nan values!!!")
            residue_depth_t += time.time() - start

            for idx, atom in enumerate(res.get_atoms()):
                if only_ca:
                    atom = ca_atom

                start = time.time()
                atom_d, s_idx = dist_list[idx]
                atom_coord = atom.get_coord()
                ca_atom_coord = ca_atom.get_coord()

                d = atom_coord - ca_atom_coord
                ca_atom_dist = np.sqrt(np.sum(d * d))
                atom_ca_surf_angle = 0
                ca_atom_surf_angle = 0
                if not np.array_equal(atom_coord, ca_atom_coord):
                    atom_ca_surf_angle = calc_angle(atom.get_vector(), ca_vec,
                                                    Vector(surface[s_idx]))
                    ca_atom_surf_angle = calc_angle(ca_vec, atom.get_vector(),
                                                    Vector(surface[s_idx]))

                if atom_d is None:
                    atom_d = 5.0
                    print(f"Nan valuess!! {atom_d}, {atom}")
                atom_d_t += time.time() - start

                start = time.time()
                setattr(atom,
                        Constants.NODE_APPENDED_FEATURES['prev_res_name'],
                        prev_res_name)
                setattr(atom,
                        Constants.NODE_APPENDED_FEATURES['next_res_name'],
                        next_res_name)
                setattr(atom,
                        Constants.NODE_APPENDED_FEATURES['residue_depth'],
                        res_d)
                setattr(atom, Constants.NODE_APPENDED_FEATURES['atom_depth'],
                        atom_d)
                setattr(atom, Constants.NODE_APPENDED_FEATURES['ca_depth'],
                        ca_d)
                setattr(atom, Constants.NODE_APPENDED_FEATURES['ca_atom_dist'],
                        ca_atom_dist)
                setattr(atom,
                        Constants.NODE_APPENDED_FEATURES['cb_ca_surf_angle'],
                        cb_ca_surf_angle)
                setattr(atom,
                        Constants.NODE_APPENDED_FEATURES['ca_cb_surf_angle'],
                        ca_cb_surf_angle)
                setattr(atom,
                        Constants.NODE_APPENDED_FEATURES['atom_ca_surf_angle'],
                        atom_ca_surf_angle)
                setattr(atom,
                        Constants.NODE_APPENDED_FEATURES['ca_atom_surf_angle'],
                        ca_atom_surf_angle)
                setattr(atom, Constants.DSSP_FEATURES_NAME, dssp_features)
                settattr_t += time.time() - start

                cumsum_main = 0
                cumsum_plane = 0

                cumsum_atom_main = [0] * len(
                    Constants.NEIGHBOUR_SUM_RADIUS_ATOMS)
                cumsum_atom_plane = [0] * len(
                    Constants.NEIGHBOUR_SUM_RADIUS_ATOMS)
                for num, radius in enumerate(Constants.NEIGHBOUR_SUM_RADIUS):
                    atoms = ns.search(atom_coord, radius)
                    setattr(
                        atom, Constants.NODE_APPENDED_FEATURES[
                            Constants.neighbour_sum_radius_name(num)],
                        len(atoms) - cumsum_main)

                    num_above_plane = num_of_atoms_above_plane(
                        surface[s_idx] - atom_coord, atom_coord, atoms)
                    setattr(
                        atom, Constants.NODE_APPENDED_FEATURES[
                            Constants.neighbour_sum_above_plane_radius_name(
                                num)], num_above_plane - cumsum_plane)
                    cumsum_main += len(atoms)
                    cumsum_plane += num_above_plane

                    for i, atom_element in enumerate(
                            Constants.NEIGHBOUR_SUM_RADIUS_ATOMS):
                        atoms_one_element = list(
                            filter(
                                lambda a: a.element.upper() == atom_element.
                                upper(), atoms))
                        setattr(
                            atom, Constants.NODE_APPENDED_FEATURES[
                                Constants.neighbour_sum_radius_name(
                                    num, atom_element)],
                            len(atoms_one_element) - cumsum_atom_main[i])

                        num_above_plane = num_of_atoms_above_plane(
                            surface[s_idx] - atom_coord, atom_coord,
                            atoms_one_element)
                        setattr(
                            atom, Constants.NODE_APPENDED_FEATURES[
                                Constants.
                                neighbour_sum_above_plane_radius_name(
                                    num, atom_element)],
                            num_above_plane - cumsum_atom_plane[i])
                        cumsum_atom_main[i] += len(atoms_one_element)
                        cumsum_atom_plane[i] += num_above_plane
                if only_ca:
                    break
            last_n_residues.append(next(residue_generator, None))