Esempio n. 1
0
def calculate_single(pdb_id, cid, seq_marker):
    seq_marker = int(seq_marker)
    pdb_data = pdb_interface.get_info_for(pdb_id)
    if pdb_data is None:
        raise Exception(f"PDB file download fail for {pdb_id}.")
    ATOM, HETATM, hb = pdb_data
    try:
        dsr_snos = _get_sno_range(ATOM, cid, seq_marker)
        if dsr_snos is None or len(dsr_snos) != 30 or dsr_snos[0] != seq_marker:
            msg = f"ATOM lines not found in range({seq_marker}, "
            f"{seq_marker + 30}) for {pdb_id}:{cid}.<br>"
            raise Exception(msg)
        res, C, CA, N = _from_considered_elements_single(ATOM, dsr_snos, cid)
        pept_bonds = _get_pept_bonds(CA, dsr_snos)
        # For filling descr df
        res_CA = _get_res_CA(res, CA, dsr_snos)
        angles, CA = dihedrals.get_descr_dihedrals(C, CA, N, dsr_snos)

        hbond_descr = hbonds.get_descr_hb(hb, ATOM, HETATM, dsr_snos)

        heavy_atom_contacts, hetatom_contacts, hetatom_covalent = \
            contacts.get_contacts(ATOM, HETATM, cid, dsr_snos)

        descr = _assemble_descr(hetatom_contacts, hetatom_covalent,
                                heavy_atom_contacts, angles, hbond_descr,
                                res_CA, pept_bonds)

        full_descr = _add_columns(descr, pdb_id, seq_marker, cid)
    except Exception as e:
        msg = f"Exception caught in descriptor calculation. Traceback: " \
              f"<{traceback.format_exc()}>. Error: <{e}>"
        raise Exception(msg)
    return full_descr
def find(matrix_file, num_seqs, pdb_seq_file, output, motif_len=30):
    conv_output = paths.CONV_OUTPUT
    conv_interface.run(matrix_file, motif_len, num_seqs, conv_output)

    pdb_cid_motif_raw = search_converters.search_run(conv_output, pdb_seq_file)
    pdb_cids = []

    for pdb_id, values in pdb_cid_motif_raw.items():
        for cid in values['cid']:
            pdb_cids.append((pdb_id, cid))

    pdb_cid_seq = dict()
    print(len(pdb_cids))
    if os.path.isfile(paths.RCSB_SEQS):
        with open(paths.RCSB_SEQS, 'rb') as file:
            rcsb_seqs = pickle.load(file)
    else:
        rcsb_seqs = dict()

    for i, (pdb_id, cid) in enumerate(pdb_cids):
        if not i % 10:
            print(i)
        if (pdb_id.upper(), cid.upper()) in rcsb_seqs:
            pdb_cid_seq[(pdb_id, cid)] = rcsb_seqs[(pdb_id.upper(),
                                                    cid.upper())]
        else:
            try:
                ATOM = pdb_interface.get_info_for(pdb_id)[0]
                ATOM_cid = ATOM[ATOM.cid == cid]
                if ATOM_cid is None:
                    continue
                seq = pdb_interface._extract_seq_from_df(ATOM_cid)
                if seq is None:
                    continue
            except Exception as e:
                print(f"get_seq_for() fails for pdb_id/cid {pdb_id}/{cid}. "
                      f"Skipping.")
                print(f"Traceback: <{traceback.format_exc()}>")
                print(f"Error_msg: <{e}>")
                continue
            pdb_cid_seq[(pdb_id, cid)] = seq
            rcsb_seqs[(pdb_id.upper(), cid.upper())] = seq
    with open(paths.RCSB_SEQS, 'wb') as file:
        pickle.dump(rcsb_seqs, file, -1)
    pdb_cid_seq = OrderedDict(sorted(pdb_cid_seq.items()))
    pdb_structure_seqs = os.path.join(paths.DEBUG, "pdb_structure_seqs.txt")
    with open(pdb_structure_seqs, 'w') as file:
        for (pdb_id, cid), seq in pdb_cid_seq.items():
            file.write(f">{pdb_id}_{cid}\n")
            file.write(seq + "\n")
    clean_fasta_alphabet.screen(pdb_structure_seqs, pdb_structure_seqs)
    filter_seqs.delete_short_seqs(pdb_structure_seqs, motif_len)

    motif_positions = search_converters.search_run(conv_output,
                                                   pdb_structure_seqs)
    with open(output, 'wb') as file:
        pickle.dump(motif_positions, file, -1)
Esempio n. 3
0
def calculate(motif_pos_map):
    descrs = pd.DataFrame()
    print(f"Total length: {len(motif_pos_map)}.")
    print(len(motif_pos_map))
    for i, (pdb_id, motif_cid_map) in enumerate(motif_pos_map.items()):
        if not (i % 10):
            print(i)
        print(f"{len(motif_pos_map) - i}: {pdb_id}")
        motif_pos_s = motif_cid_map['sno_markers']
        cids = motif_cid_map['cid']

        pdb_data = pdb_interface.get_info_for(pdb_id)
        if pdb_data is None:
            continue
        ATOM, HETATM, hb = pdb_data
        if not isinstance(motif_pos_s, list):
            motif_pos_s = [motif_pos_s]
            cids = [cids]

        for motif_pos, cid in zip(motif_pos_s, cids):
            try:
                dsr_snos = _get_sno_range(ATOM, cid, motif_pos)
                if dsr_snos is None:
                    continue
                res, C, CA, N = _from_considered_elements(ATOM, dsr_snos, cid)
                pept_bonds = _get_pept_bonds(CA, dsr_snos)
                # For filling descr df
                res_CA = _get_res_CA(res, CA, dsr_snos)
                angles, CA = dihedrals.get_descr_dihedrals(C, CA, N, dsr_snos)

                hbond_descr = hbonds.get_descr_hb(hb, ATOM, HETATM, dsr_snos)

                heavy_atom_contacts, hetatom_contacts, hetatom_covalent = \
                    contacts.get_contacts(ATOM, HETATM, cid, dsr_snos)

                descr = _assemble_descr(hetatom_contacts, hetatom_covalent,
                                        heavy_atom_contacts, angles, hbond_descr,
                                        res_CA, pept_bonds)

                full_descr = _add_columns(descr, pdb_id, motif_pos, cid)
                descrs = descrs.append(full_descr, ignore_index=True)
            except Exception as e:
                print(e)
                print(f"Calc_descr failed for {pdb_id}:{cid}")
                pdb_suffix = pdb_id.lower().strip()
                if pdb_suffix+".pkl" in paths.PDB_PARSED_SET:
                    os.remove(os.path.join(paths.PDB_PARSED,
                                           pdb_suffix + ".pkl"))
                # raise
                continue
    return descrs
Esempio n. 4
0
def calculate(motif_pos_map):
    descrs = pd.DataFrame()
    i = 0
    for pdb_id, motif_cid_map in motif_pos_map.items():
        i += 1
        # if pdb_id != "2xsx":
        #     continue
        motif_pos = motif_cid_map['sno_markers']
        cid = motif_cid_map['cid']

        pdb_data = pdb_interface.get_info_for(pdb_id)
        if pdb_data is None:
            continue
        ATOM, HETATM, hb = pdb_data
        try:
            motif_pos = motif_pos[0]
            dsr_snos = _get_sno_range(ATOM, cid, motif_pos)
            if dsr_snos is None:
                continue
            res, C, CA, N = _from_considered_elements(ATOM, dsr_snos, cid)
            pept_bonds = _get_pept_bonds(CA, dsr_snos)
            # For filling descr df
            res_CA = _get_res_CA(res, CA, dsr_snos)
            angles, CA = dihedrals.get_descr_dihedrals(C, CA, N, dsr_snos)

            hbond_descr = hbonds.get_descr_hb(hb, ATOM, HETATM, dsr_snos)

            heavy_atom_contacts, hetatom_contacts, hetatom_covalent = \
                contacts.get_contacts(ATOM, HETATM, cid, dsr_snos)

            descr = _assemble_descr(hetatom_contacts, hetatom_covalent,
                                    heavy_atom_contacts, angles, hbond_descr,
                                    res_CA, pept_bonds)

            full_descr = _add_columns(descr, pdb_id, motif_pos, cid)
            descrs = descrs.append(full_descr, ignore_index=True)
        except:
            print(f"Calc_descr failed for {pdb_id}:{cid}")
            raise
            continue
    return descrs