Example #1
0
def pdbidx1( pdbname ):
    with open( pdbname, "r" ) as f:
        outlines = pdb_util.pdb_idx1( pdbname )
        outfn = pdb_util.get_uniq_outpdbname( pdbname, "_idx1")
        buf = open( outfn, "w" )
        buf.write( outlines )
        buf.close()
Example #2
0
def correct_alignment_using_pdb( alignment, sbj_pdbfile, remove_temppdb=True ):
    """ This function is trying to fix an alignment issue caused by the penalty of opening a gap in dynamic programming
    eg:
            123456789
            NKTTTTTKG <- ref_seq_aligned
            NK------G <- mistaken sbj_seq_aligned (doesn't like a gap)
            N------KG <- correct sbj_seq_aligned (from the pdb, KG are connected)
            1      23
            seq_map = { 1:1,
                        2:2,  <- should be 2:8
                        3:9 }
    """
    ref_seq_aligned, sbj_seq_aligned = ( alignment[0], alignment[1] )

    # to have a standard, number residues from 1 to the end, continuously
    pdb_util.pdb_idx1( sbj_pdbfile, "temp.pdb" )
    xyz_dict, pdbline_dict, resname_dict = pdb_util.create_xyzDict_bychain( "temp.pdb" )
    if remove_temppdb:
        os.remove("temp.pdb")

    assert len( pdbline_dict.keys() ) == 1, "this script does not deal with pdbs containing multiple chains (%s)" % pdbline_dict.keys()
    chain = ( pdbline_dict.keys()[0] )

    xyz_dict = xyz_dict[ chain ]
    sbj_pdb_idx1_res_nums = sorted( pdbline_dict[chain].keys() )

    seq_map = seq_mapping( alignment )
    corrected_seq_map = seq_map

    for idx, rsn in enumerate( sbj_pdb_idx1_res_nums ):
        try:
            next_rsn = sbj_pdb_idx1_res_nums[idx+1]
            assert next_rsn == rsn+1 # shouldn't it be rsn+1 since residues in the pdb has been reindex from 1
        except:
            pass # do nothing since next_rsn will be equal to rsn

        # newrsn means the rsn you would like to number from the reference (ref_seq_aligned)
        newrsn = seq_map[rsn]

        try:
            next_newrsn = seq_map[next_rsn]
        except KeyError:
            sys.stderr.write("ERROR: couldn't find the next rsn in the old\n")
            raise

        # detect chain break from alignment
        if ( next_newrsn - newrsn ) > 1: # there's a gap
            #sys.stderr.write("%s, %s\n" %(newrsn, next_newrsn ))
            dist = pdb_util.cal_dist( xyz_dict[rsn]["CA"], xyz_dict[next_rsn]["CA"] ) # dist from old numbering
            sys.stderr.write("Chainbreak (from alignment) at %s(%s)-%s(%s) with dist %.3f\n" %(rsn, newrsn, next_rsn, next_newrsn, dist))

            # if no physically chainbreak detected, possibly could be error in dynamic programming, trying fixing it by looking for same residue name in next_newrsn-1, if so, overwrite the corrent numbering to write to the next_newrsn-1
            if ( dist <= 4.5 ):
                sys.stderr.write("WARNING: no gap physically detected from %s-%s; caught an error in dynamic programming...\n" %(newrsn, next_newrsn))
                # index in seq, thus -1
                # just to make sure this error is caused by the dynamic programming (must be same residue name)
                assert ( ref_seq_aligned[newrsn-1] == ref_seq_aligned[next_newrsn-2] ), "ERROR: failing to looking for same residue name as pos:%s in pos:%s of fasta file could not fix the error in dynamic programming\n%s\n%s\n%s\nIs this a partial thread?" %( newrsn, next_newrsn-1, alignment[0], alignment[1], sbj_pdbfile )
                sys.stderr.write("Correcting an alignment problem caused by DP: %s(%s) -> %s(%s)\n"%( ref_seq_aligned[newrsn-1], newrsn, ref_seq_aligned[next_newrsn-2], next_newrsn-1 ) )

                corrected_newrsn = next_newrsn-1
                corrected_seq_map[rsn] = corrected_newrsn

    # make new sbj_seq_aligned
    corrected_sbj_seq_aligned = ""

    """
    make dict[ ref_numbering ] = old_numbering; was dict[ old_numbering ] = ref_numbering
        seq_map = { 1:1,
                    2:8,
                    3:9 }
        thus the keys will be residues that are aligned (in reference numbering)
        revert_seq_map = { 1:1,
                           8:2,
                           9:3 }
    """
    new_dict = python_util.invert_dict( corrected_seq_map )
    res_numbers_aligned = new_dict.keys()
    for idx, rsd in enumerate(ref_seq_aligned):
        rsn = idx+1
        if rsn in res_numbers_aligned:
            corrected_sbj_seq_aligned += ref_seq_aligned[idx]
        else:
            corrected_sbj_seq_aligned += "-"

    return (ref_seq_aligned, corrected_sbj_seq_aligned)