def test_DSSP_hbonds(self): """Test parsing of DSSP hydrogen bond information.""" dssp, keys = make_dssp_dict("PDB/2BEG.dssp") dssp_indices = {v[5] for v in dssp.values()} hb_indices = set() # The integers preceding each hydrogen bond energy (kcal/mol) in the # "N-H-->O O-->H-N N-H-->O O-->H-N" dssp output columns are # relative dssp indices. Therefore, "hb_indices" contains the absolute # dssp indices of residues participating in (provisional) h-bonds. Note # that actual h-bonds are typically determined by an energetic # threshold. for val in dssp.values(): hb_indices |= { val[5] + x for x in (val[6], val[8], val[10], val[12]) } # Check if all h-bond partner indices were successfully parsed. self.assertEqual((dssp_indices & hb_indices), hb_indices)
def test_DSSP_noheader_file(self): """Test parsing of pregenerated DSSP missing header information.""" # New DSSP prints a line containing only whitespace and "." dssp, keys = make_dssp_dict("PDB/2BEG_noheader.dssp") self.assertEqual(len(dssp), 130)
def test_DSSP_file(self): """Test parsing of pregenerated DSSP""" dssp, keys = make_dssp_dict("PDB/2BEG.dssp") self.assertEqual(len(dssp), 130)
def test_DSSP_file(self): """Test parsing of pregenerated DSSP.""" dssp, keys = make_dssp_dict("PDB/2BEG.dssp") self.assertEqual(len(dssp), 130)
def generate_node_features(protein_chains, surface, ns: NeighborSearch, only_ca=Constants.GET_ONLY_CA_ATOMS): pdb_id = protein_chains[0].get_parent().full_id[0] pdb_id = pdb_id[-4:] dssp = make_dssp_dict(os.path.join(Constants.DSSP_PATH, pdb_id + '.dssp')) get_residues_t = dssp_key_t = min_dist_t = residue_depth_t = atom_d_t = settattr_t = 0 for chain in protein_chains: start = time.time() residue_generator = chain.get_residues() get_residues_t += time.time() - start last_n_residues = deque( [None, next(residue_generator), next(residue_generator, None)]) while last_n_residues[1] is not None: prev_res = last_n_residues.popleft() prev_res_name = Constants.EMPTY_STR_FEATURE if prev_res is not None: prev_res_name = prev_res.resname res = last_n_residues[0] next_res = last_n_residues[1] next_res_name = Constants.EMPTY_STR_FEATURE if next_res is not None: next_res_name = next_res.resname start = time.time() is_key = True key = res.full_id[2:] if key not in dssp[0]: key = (key[0], (' ', key[1][1], ' ')) if key not in dssp[0]: for dssp_key in dssp[0]: if dssp_key[0] == key[0] and dssp_key[1][1] == key[1][ 1]: key = dssp_key break if key not in dssp[0]: is_key = False # raise Exception(f'DSSP key not found for {key}, model {res.full_id[0]}') if is_key: dssp_features = dssp[0][key] else: dssp_features = ('', '-', 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0) dssp_key_t += time.time() - start start = time.time() is_cb = 'CB' in res cb_ca_surf_angle = 0 ca_cb_surf_angle = 0 ca_atom = res['CA'] ca_d, ca_surf_idx = min_dist(ca_atom.get_coord(), surface) ca_vec = ca_atom.get_vector() if not is_cb: # print('there is no CB ..... :(((((((') pass else: cb_vec = res['CB'].get_vector() cb_d, cb_surf_idx = min_dist(res['CB'].get_coord(), surface) cb_ca_surf_angle = calc_angle(cb_vec, ca_vec, Vector(surface[ca_surf_idx])) ca_cb_surf_angle = calc_angle(ca_vec, cb_vec, Vector(surface[cb_surf_idx])) min_dist_t += time.time() - start start = time.time() res_d, dist_list = residue_depth(res, surface) if res_d is None: res_d = 5.0 print("Nan values!!!") if ca_d is None: ca_d = 5.0 print("Nan values!!!") residue_depth_t += time.time() - start for idx, atom in enumerate(res.get_atoms()): if only_ca: atom = ca_atom start = time.time() atom_d, s_idx = dist_list[idx] atom_coord = atom.get_coord() ca_atom_coord = ca_atom.get_coord() d = atom_coord - ca_atom_coord ca_atom_dist = np.sqrt(np.sum(d * d)) atom_ca_surf_angle = 0 ca_atom_surf_angle = 0 if not np.array_equal(atom_coord, ca_atom_coord): atom_ca_surf_angle = calc_angle(atom.get_vector(), ca_vec, Vector(surface[s_idx])) ca_atom_surf_angle = calc_angle(ca_vec, atom.get_vector(), Vector(surface[s_idx])) if atom_d is None: atom_d = 5.0 print(f"Nan valuess!! {atom_d}, {atom}") atom_d_t += time.time() - start start = time.time() setattr(atom, Constants.NODE_APPENDED_FEATURES['prev_res_name'], prev_res_name) setattr(atom, Constants.NODE_APPENDED_FEATURES['next_res_name'], next_res_name) setattr(atom, Constants.NODE_APPENDED_FEATURES['residue_depth'], res_d) setattr(atom, Constants.NODE_APPENDED_FEATURES['atom_depth'], atom_d) setattr(atom, Constants.NODE_APPENDED_FEATURES['ca_depth'], ca_d) setattr(atom, Constants.NODE_APPENDED_FEATURES['ca_atom_dist'], ca_atom_dist) setattr(atom, Constants.NODE_APPENDED_FEATURES['cb_ca_surf_angle'], cb_ca_surf_angle) setattr(atom, Constants.NODE_APPENDED_FEATURES['ca_cb_surf_angle'], ca_cb_surf_angle) setattr(atom, Constants.NODE_APPENDED_FEATURES['atom_ca_surf_angle'], atom_ca_surf_angle) setattr(atom, Constants.NODE_APPENDED_FEATURES['ca_atom_surf_angle'], ca_atom_surf_angle) setattr(atom, Constants.DSSP_FEATURES_NAME, dssp_features) settattr_t += time.time() - start cumsum_main = 0 cumsum_plane = 0 cumsum_atom_main = [0] * len( Constants.NEIGHBOUR_SUM_RADIUS_ATOMS) cumsum_atom_plane = [0] * len( Constants.NEIGHBOUR_SUM_RADIUS_ATOMS) for num, radius in enumerate(Constants.NEIGHBOUR_SUM_RADIUS): atoms = ns.search(atom_coord, radius) setattr( atom, Constants.NODE_APPENDED_FEATURES[ Constants.neighbour_sum_radius_name(num)], len(atoms) - cumsum_main) num_above_plane = num_of_atoms_above_plane( surface[s_idx] - atom_coord, atom_coord, atoms) setattr( atom, Constants.NODE_APPENDED_FEATURES[ Constants.neighbour_sum_above_plane_radius_name( num)], num_above_plane - cumsum_plane) cumsum_main += len(atoms) cumsum_plane += num_above_plane for i, atom_element in enumerate( Constants.NEIGHBOUR_SUM_RADIUS_ATOMS): atoms_one_element = list( filter( lambda a: a.element.upper() == atom_element. upper(), atoms)) setattr( atom, Constants.NODE_APPENDED_FEATURES[ Constants.neighbour_sum_radius_name( num, atom_element)], len(atoms_one_element) - cumsum_atom_main[i]) num_above_plane = num_of_atoms_above_plane( surface[s_idx] - atom_coord, atom_coord, atoms_one_element) setattr( atom, Constants.NODE_APPENDED_FEATURES[ Constants. neighbour_sum_above_plane_radius_name( num, atom_element)], num_above_plane - cumsum_atom_plane[i]) cumsum_atom_main[i] += len(atoms_one_element) cumsum_atom_plane[i] += num_above_plane if only_ca: break last_n_residues.append(next(residue_generator, None))