Beispiel #1
0
def get_positions_to_skip_from_alignment( alignment, fraglen ):
    '''
    eg. if fraglen equals to 3
        sequence       FDASFDSA-----FDSAFDSAFDSA
        pos_to_acquire ^^^^^^       ^^^^^^^^^^
    '''
    chainbreak_resnums = []
    seq_map_dict = alignment_util.seq_mapping( alignment )
    res_nums = sorted( seq_map_dict.keys() )

    for idx, rsn in enumerate( res_nums ):
        try:
            next_rsn = res_nums[idx+1]
            #print idx
        except IndexError:
            # meaning it has reached the end of the container, get termini!
            chainbreak_resnums.append( rsn )

        ref_rsn = seq_map_dict[rsn]
        ref_next_rsn = seq_map_dict[next_rsn]
        if ref_next_rsn - ref_rsn > 1:
            chainbreak_resnums.append( rsn )

    rsd_to_skip = []
    for rsd in chainbreak_resnums:
        rsd_to_skip += range(rsd-fraglen+2, rsd+1 )

    rsd_to_skip = list( set(rsd_to_skip) )

    return rsd_to_skip
Beispiel #2
0
def align_and_renumber_pdb(fulllength_fasta, truncated_pdbfile, ignore_check=False):
    """ This function is going to make the renumber_pdb() obsolete """
    # make alignment
    fl_seq = seq_util.fasta_file_reader(fulllength_fasta)
    tc_seq = seq_util.pdb2fasta(truncated_pdbfile)
    alignment = alignment_util.align_two_seqs(fl_seq, tc_seq)

    if ignore_check:
        pdb_idx1(
            truncated_pdbfile, "temp.pdb"
        )  # for the following step, this has been used in alignment_util.correct_alignment_using_pdb
    else:
        alignment = alignment_util.correct_alignment_using_pdb(alignment, truncated_pdbfile, False)

    seq_map = alignment_util.seq_mapping(alignment)

    xyz_dict, pdbline_dict, resname_dict = create_xyzDict_bychain("temp.pdb")
    assert len(pdbline_dict.keys()) == 1, (
        "this script does not deal with pdbs containing multiple chains (%s)" % pdbline_dict.keys()
    )
    chain = pdbline_dict.keys()[0]

    xyz_dict = xyz_dict[chain]
    pdbline_dict = pdbline_dict[chain]
    resname_dict = resname_dict[chain]
    res_nums = sorted(pdbline_dict.keys())

    out_pdblines = "REMARK full_length_aln %s\n" % alignment[0]
    out_pdblines += "REMARK truncated_aln   %s\n" % alignment[1]

    for idx, rsn in enumerate(res_nums):
        newrsn = seq_map[rsn]
        for line in pdbline_dict[rsn].split("\n")[:-1]:  # [:-1], because the last item in the list is ''
            out_pdblines += line[0:22] + "%4s" % newrsn + line[26:] + "\n"
    out_pdblines += "TER\n"

    os.remove("temp.pdb")

    return out_pdblines
        fl_seq = seq_util.fasta_file_reader(opts.fragfile_fasta)
        if opts.truncated_pdb:
            tc_seq = seq_util.pdb2fasta(opts.truncated_pdb)
            alignment = alignment_util.correct_alignment_using_pdb(
                alignment_util.align_two_seqs(fl_seq, tc_seq), opts.truncated_pdb
            )
        elif opts.truncated_fasta:
            tc_seq = seq_util.fasta_file_reader(opts.truncated_fasta)
            alignment = alignment_util.align_two_seqs(fl_seq, tc_seq)
        else:
            sys.stderr.write("ERROR: you need to either give --truncated_pdb or --truncated_fasta\n")
            exit()

        chainbreak_resnums = frag_util.get_positions_to_skip_from_alignment(alignment, frag_len)

        seq_map = alignment_util.seq_mapping(alignment)
        residues = sorted(seq_map.keys())

        # residues before chain break shouldn't take

        # get residue positions to keep
        out_fraglines = ""
        if opts.debug:
            out_fraglines += "# %s\n" % (alignment[0])
            out_fraglines += "# %s\n" % (alignment[1])

        for pos in residues:
            # skip positions at chainbreaks and termini
            if pos in chainbreak_resnums:
                sys.stderr.write("Skipping: %s\n" % pos)
                continue