def calculate_single(pdb_id, cid, seq_marker): seq_marker = int(seq_marker) pdb_data = pdb_interface.get_info_for(pdb_id) if pdb_data is None: raise Exception(f"PDB file download fail for {pdb_id}.") ATOM, HETATM, hb = pdb_data try: dsr_snos = _get_sno_range(ATOM, cid, seq_marker) if dsr_snos is None or len(dsr_snos) != 30 or dsr_snos[0] != seq_marker: msg = f"ATOM lines not found in range({seq_marker}, " f"{seq_marker + 30}) for {pdb_id}:{cid}.<br>" raise Exception(msg) res, C, CA, N = _from_considered_elements_single(ATOM, dsr_snos, cid) pept_bonds = _get_pept_bonds(CA, dsr_snos) # For filling descr df res_CA = _get_res_CA(res, CA, dsr_snos) angles, CA = dihedrals.get_descr_dihedrals(C, CA, N, dsr_snos) hbond_descr = hbonds.get_descr_hb(hb, ATOM, HETATM, dsr_snos) heavy_atom_contacts, hetatom_contacts, hetatom_covalent = \ contacts.get_contacts(ATOM, HETATM, cid, dsr_snos) descr = _assemble_descr(hetatom_contacts, hetatom_covalent, heavy_atom_contacts, angles, hbond_descr, res_CA, pept_bonds) full_descr = _add_columns(descr, pdb_id, seq_marker, cid) except Exception as e: msg = f"Exception caught in descriptor calculation. Traceback: " \ f"<{traceback.format_exc()}>. Error: <{e}>" raise Exception(msg) return full_descr
def find(matrix_file, num_seqs, pdb_seq_file, output, motif_len=30): conv_output = paths.CONV_OUTPUT conv_interface.run(matrix_file, motif_len, num_seqs, conv_output) pdb_cid_motif_raw = search_converters.search_run(conv_output, pdb_seq_file) pdb_cids = [] for pdb_id, values in pdb_cid_motif_raw.items(): for cid in values['cid']: pdb_cids.append((pdb_id, cid)) pdb_cid_seq = dict() print(len(pdb_cids)) if os.path.isfile(paths.RCSB_SEQS): with open(paths.RCSB_SEQS, 'rb') as file: rcsb_seqs = pickle.load(file) else: rcsb_seqs = dict() for i, (pdb_id, cid) in enumerate(pdb_cids): if not i % 10: print(i) if (pdb_id.upper(), cid.upper()) in rcsb_seqs: pdb_cid_seq[(pdb_id, cid)] = rcsb_seqs[(pdb_id.upper(), cid.upper())] else: try: ATOM = pdb_interface.get_info_for(pdb_id)[0] ATOM_cid = ATOM[ATOM.cid == cid] if ATOM_cid is None: continue seq = pdb_interface._extract_seq_from_df(ATOM_cid) if seq is None: continue except Exception as e: print(f"get_seq_for() fails for pdb_id/cid {pdb_id}/{cid}. " f"Skipping.") print(f"Traceback: <{traceback.format_exc()}>") print(f"Error_msg: <{e}>") continue pdb_cid_seq[(pdb_id, cid)] = seq rcsb_seqs[(pdb_id.upper(), cid.upper())] = seq with open(paths.RCSB_SEQS, 'wb') as file: pickle.dump(rcsb_seqs, file, -1) pdb_cid_seq = OrderedDict(sorted(pdb_cid_seq.items())) pdb_structure_seqs = os.path.join(paths.DEBUG, "pdb_structure_seqs.txt") with open(pdb_structure_seqs, 'w') as file: for (pdb_id, cid), seq in pdb_cid_seq.items(): file.write(f">{pdb_id}_{cid}\n") file.write(seq + "\n") clean_fasta_alphabet.screen(pdb_structure_seqs, pdb_structure_seqs) filter_seqs.delete_short_seqs(pdb_structure_seqs, motif_len) motif_positions = search_converters.search_run(conv_output, pdb_structure_seqs) with open(output, 'wb') as file: pickle.dump(motif_positions, file, -1)
def calculate(motif_pos_map): descrs = pd.DataFrame() print(f"Total length: {len(motif_pos_map)}.") print(len(motif_pos_map)) for i, (pdb_id, motif_cid_map) in enumerate(motif_pos_map.items()): if not (i % 10): print(i) print(f"{len(motif_pos_map) - i}: {pdb_id}") motif_pos_s = motif_cid_map['sno_markers'] cids = motif_cid_map['cid'] pdb_data = pdb_interface.get_info_for(pdb_id) if pdb_data is None: continue ATOM, HETATM, hb = pdb_data if not isinstance(motif_pos_s, list): motif_pos_s = [motif_pos_s] cids = [cids] for motif_pos, cid in zip(motif_pos_s, cids): try: dsr_snos = _get_sno_range(ATOM, cid, motif_pos) if dsr_snos is None: continue res, C, CA, N = _from_considered_elements(ATOM, dsr_snos, cid) pept_bonds = _get_pept_bonds(CA, dsr_snos) # For filling descr df res_CA = _get_res_CA(res, CA, dsr_snos) angles, CA = dihedrals.get_descr_dihedrals(C, CA, N, dsr_snos) hbond_descr = hbonds.get_descr_hb(hb, ATOM, HETATM, dsr_snos) heavy_atom_contacts, hetatom_contacts, hetatom_covalent = \ contacts.get_contacts(ATOM, HETATM, cid, dsr_snos) descr = _assemble_descr(hetatom_contacts, hetatom_covalent, heavy_atom_contacts, angles, hbond_descr, res_CA, pept_bonds) full_descr = _add_columns(descr, pdb_id, motif_pos, cid) descrs = descrs.append(full_descr, ignore_index=True) except Exception as e: print(e) print(f"Calc_descr failed for {pdb_id}:{cid}") pdb_suffix = pdb_id.lower().strip() if pdb_suffix+".pkl" in paths.PDB_PARSED_SET: os.remove(os.path.join(paths.PDB_PARSED, pdb_suffix + ".pkl")) # raise continue return descrs
def calculate(motif_pos_map): descrs = pd.DataFrame() i = 0 for pdb_id, motif_cid_map in motif_pos_map.items(): i += 1 # if pdb_id != "2xsx": # continue motif_pos = motif_cid_map['sno_markers'] cid = motif_cid_map['cid'] pdb_data = pdb_interface.get_info_for(pdb_id) if pdb_data is None: continue ATOM, HETATM, hb = pdb_data try: motif_pos = motif_pos[0] dsr_snos = _get_sno_range(ATOM, cid, motif_pos) if dsr_snos is None: continue res, C, CA, N = _from_considered_elements(ATOM, dsr_snos, cid) pept_bonds = _get_pept_bonds(CA, dsr_snos) # For filling descr df res_CA = _get_res_CA(res, CA, dsr_snos) angles, CA = dihedrals.get_descr_dihedrals(C, CA, N, dsr_snos) hbond_descr = hbonds.get_descr_hb(hb, ATOM, HETATM, dsr_snos) heavy_atom_contacts, hetatom_contacts, hetatom_covalent = \ contacts.get_contacts(ATOM, HETATM, cid, dsr_snos) descr = _assemble_descr(hetatom_contacts, hetatom_covalent, heavy_atom_contacts, angles, hbond_descr, res_CA, pept_bonds) full_descr = _add_columns(descr, pdb_id, motif_pos, cid) descrs = descrs.append(full_descr, ignore_index=True) except: print(f"Calc_descr failed for {pdb_id}:{cid}") raise continue return descrs