Example #1
0
def score(PDBfile):
    """
    Calculates the m-score for a given PDB file

    arguments:
    
    PDBfile - the PDB file to score

    hidden arguments:

    aas.scr, pro.scr, gly.scr - the scoring tables
    need to be present in working directory
    
    """
    from pro_angle import find_residue
    from Bio.PDB.PDBParser import PDBParser
    from pro_length import length
    
    (aas, gly, pro) = load_scores() ##define global tables
    score = 0 #initialize    
    pars = PDBParser(PERMISSIVE = 1)
    struct = pars.get_structure(PDBfile.rstrip('.pdb'), PDBfile)
    model = struct.child_list[0]
    chain = model.child_list[0]
    pro_list = find_residue(chain, 'PRO')
    gly_list = find_residue(chain, 'GLY')
    aas_list = range(chain.child_list[1].id[1],
                     chain.child_list[len(chain)-1].id[1])
    #need to remove pro/gly indices in first/last position
    if pro_list.count(1) > 0:        
        pro_list.remove(1)
    if pro_list.count(len(chain)-1) > 0:
        pro_list.remove(len(chain)-1)
    if gly_list.count(1) > 0:
        gly_list.remove(1)
    if gly_list.count(len(chain)-1) > 0:
        gly_list.remove(len(chain)-1)   
    try:
        for index in pro_list:       
            aas_list.remove(index) #remove pros from aas_list
        for index in gly_list:
            aas_list.remove(index) #remove glys from aas_list
    except ValueError:
        print 'incosistency in PDB file - will return score = 0' 
        return 0
    else:
        proscore = score_help(chain, pro_list, pro)
        glyscore = score_help(chain, gly_list, gly)
        aasscore = score_help(chain, aas_list, aas)
        score = proscore+glyscore+aasscore
        size=length(chain)
        try:
            score = (score/size)*1000 #normalize score
            return score
        except ZeroDivisionError:
            print "calculated protein length 0 -> returning score 0"
            score = 0
            return score
Example #2
0
def score(PDBfile):
    """
    Calculates the m-score for a given PDB file

    arguments:
    
    PDBfile - the PDB file to score

    hidden arguments:

    aas.scr, pro.scr, gly.scr - the scoring tables
    need to be present in working directory
    
    """
    from pro_angle import find_residue
    from Bio.PDB.PDBParser import PDBParser
    from pro_length import length

    (aas, gly, pro) = load_scores()  ##define global tables
    score = 0  #initialize
    pars = PDBParser(PERMISSIVE=1)
    struct = pars.get_structure(PDBfile.rstrip('.pdb'), PDBfile)
    model = struct.child_list[0]
    chain = model.child_list[0]
    pro_list = find_residue(chain, 'PRO')
    gly_list = find_residue(chain, 'GLY')
    aas_list = range(chain.child_list[1].id[1],
                     chain.child_list[len(chain) - 1].id[1])
    #need to remove pro/gly indices in first/last position
    if pro_list.count(1) > 0:
        pro_list.remove(1)
    if pro_list.count(len(chain) - 1) > 0:
        pro_list.remove(len(chain) - 1)
    if gly_list.count(1) > 0:
        gly_list.remove(1)
    if gly_list.count(len(chain) - 1) > 0:
        gly_list.remove(len(chain) - 1)
    try:
        for index in pro_list:
            aas_list.remove(index)  #remove pros from aas_list
        for index in gly_list:
            aas_list.remove(index)  #remove glys from aas_list
    except ValueError:
        print 'incosistency in PDB file - will return score = 0'
        return 0
    else:
        proscore = score_help(chain, pro_list, pro)
        glyscore = score_help(chain, gly_list, gly)
        aasscore = score_help(chain, aas_list, aas)
        score = proscore + glyscore + aasscore
        size = length(chain)
        try:
            score = (score / size) * 1000  #normalize score
            return score
        except ZeroDivisionError:
            print "calculated protein length 0 -> returning score 0"
            score = 0
            return score
Example #3
0
def score(PDBfile):
    """
    Calculates the m-score for a given PDB file

    arguments:
    
    PDBfile - the PDB file to score

    hidden arguments:

    aas.scr, pro.scr, gly.scr - the scoring tables
    need to be present in working directory
    
    """
    from pro_angle import find_residue
    from Bio.PDB.PDBParser import PDBParser
    from pro_length import length
    import os
    import string

    score = 0 #initialize    
    pars = PDBParser(PERMISSIVE = 1)
    struct = pars.get_structure(PDBfile.rstrip('.pdb'), PDBfile)
    model = struct.child_list[0]
    chain = model.child_list[0]

    score = float(0)
    size=length(chain)

    for res_index in range(1, size-2): #not first or last res
        res = chain.child_list[res_index]
        cur = res.resname
        pre = chain.child_list[res_index-1].resname
        pos = chain.child_list[res_index+1].resname
        
        filename = pre + '_' + cur + '_' + pos + '.scr'
        
        table_file = '/home/marciovm/proteins/bdtrimers/' + string.lower(cur) + '/' + filename

        chain_index = chain.child_list[res_index].id[1]

        table = load_scores(table_file)
        if table != 0:
            new = score_help(chain, chain_index, table)
        else:
            new = 0
        score = score + new       
    try:
        score = (score/size)*1000 #normalize score
        return score
    except ZeroDivisionError:
        print "calculated protein length 0 -> returning score 0"
        score = 0
        return score
Example #4
0
def score(PDBfile,
          start_index=1,
          end_index=9999,
          smoothed='y',
          mutated_list=' '):
    """
    Calculates the m-score for a given PDB file

    arguments:
    
    PDBfile - the PDB file to score

    start_index - child_list stat index of residues to look at

    end_index - child_list end index
    
    hidden arguments:

    aas.scr, pro.scr, gly.scr - the scoring tables
    need to be present in working directory
    
    """
    from pro_angle import find_residue
    from Bio.PDB.PDBParser import PDBParser
    from pro_length import length
    import os
    import string

    score = 0  #initialize
    pars = PDBParser(PERMISSIVE=1)
    struct = pars.get_structure(PDBfile.rstrip('.pdb'), PDBfile)
    model = struct.child_list[0]
    chain = model.child_list[0]

    score = float(0)
    size = length(chain)

    if end_index > (size - 2):
        end_index = size - 2

    #non-mutated score
    if mutated_list == ' ':
        for res_index in range(start_index, end_index):  #not first or last res
            res = chain.child_list[res_index]
            cur = res.resname
            pre = chain.child_list[res_index - 1].resname
            pos = chain.child_list[res_index + 1].resname
            if smoothed == 'y':
                filename = pre + '_' + cur + '_' + pos + '.smooth.scr'
            else:
                filename = pre + '_' + cur + '_' + pos + '.scr'
            table_file = '/home/marciovm/proteins/bdtrimers/' + \
            string.lower(cur) + '/' + filename
            chain_index = chain.child_list[res_index].id[1]

            #           print 'loading table file: ' + table_file ### debugging

            table = load_scores(table_file)
            if table != 0:
                new = score_help(chain, chain_index, table)
            else:
                new = 0
            score = score + new
    else:
        mutated_form = mutated_list.split()
        for res_index in range(start_index, end_index):
            res = chain.child_list[res_index]
            mutated_index = res_index - start_index
            cur = mutated_form[res_index - start_index]
            if mutated_index == 0:
                pre = chain.child_list[res_index - 1].resname
            else:
                pre = mutated_form[mutated_index - 1]
            if mutated_index == end_index - start_index - 1:
                pos = chain.child_list[res_index + 1].resname
            else:
                pos = mutated_form[mutated_index + 1]
            if smoothed == 'y':
                filename = pre + '_' + cur + '_' + pos + '.smooth.scr'
            else:
                filename = pre + '_' + cur + '_' + pos + '.scr'

            table_file = '/home/marciovm/proteins/bdtrimers/' + \
            string.lower(cur) + '/' + filename
            chain_index = chain.child_list[res_index].id[1]

            # print 'loading table file: ' + table_file ##debugging

            table = load_scores(table_file)
            if table != 0:
                new = score_help(chain, chain_index, table)
            else:
                new = 0
            score = score + new
    try:
        score = (score / size) * 1000  #normalize score
        return score
    except ZeroDivisionError:
        print "calculated protein length 0 -> returning score 0"
        score = 0
        return score
def compile_trimers():

    import os
    import commands
    import sys
    import string
    from Bio.PDB.PDBParser import PDBParser
    from pro_length import length    
    
    os.chdir('/home/marciovm/proteins/bigdist') ##choose directory
    
    list = commands.getoutput('ls *.ent').split() #list of pdb files w/ chain names
    
    start = sys.argv[1]
    end = sys.argv[2]
    
    aas = ['GLY', 'ALA', 'VAL', 'LEU', 'ILE', 'SER', 'THR', 'ASN', 'GLN',
           'PHE', 'TYR', 'TRP', 'CYS', 'MET', 'PRO', 'ASP', 'GLU', 'LYS',
           'ARG', 'HIS']
    
    for pdb_index in range(string.atoi(start), string.atoi(end)):
        pdb = list[pdb_index]

    
        pars = PDBParser(PERMISSIVE = 1)
        struct = pars.get_structure(pdb, pdb)
        model = struct.child_list[0]
        if pdb[4] == '.':
            chain_name = ' '
        else:
            chain_name = string.upper(pdb[4])            
        chain = model.child_dict[chain_name]

        print 'processing ' + pdb + ' index: ' + str(pdb_index)

        ##find first real residue
        index = 0
        res = 'first'
        while aas.count(res) == 0 and index < 10000:
            try:
                res = chain.child_list[index].resname
            except KeyError:
                pass #nothing here
            index += 1
        res = chain.child_list[index].resname ##should be 2nd real residue
        ##find last real residue
        final_res_index = length(chain) + index - 3 ##2nd to last real residue
        ##look at everything in between
        for aa_index in range(index, final_res_index):
            aacur = chain.child_list[aa_index].resname
            aapre = chain.child_list[aa_index - 1].resname
            aapost= chain.child_list[aa_index + 1].resname
            seq = aapre + '_' + aacur + "_" + aapost + '.dat'
            filename = ('/home/marciovm/proteins/bdtrimers/' + string.lower(aacur) + '/' + seq)
            try:
                file_out = open(filename, 'a')
            except IOError:
                print "IOError line 63, nostandard aa name in PDB"
            else:
                tuple = calc_all_dihedrals(chain, aa_index)
                if tuple:
                    if len(tuple) > 2:
                        file_out.write(str(tuple[0]).zfill(6) + '  '
                                       +str(tuple[1]).zfill(6)+ '  '
                                       +str(tuple[2]).zfill(6)+ '  '
                                       +str(tuple[3]).zfill(6)+'\n')
                    else:
                        file_out.write(str(tuple[0]).zfill(6)+ '  '
                                       +str(tuple[1]).zfill(6)+'\n')
                file_out.close()
Example #6
0
def score(PDBfile, start_index = 1, end_index = 9999, smoothed = 'y', mutated_list = ' '):
    """
    Calculates the m-score for a given PDB file

    arguments:
    
    PDBfile - the PDB file to score

    start_index - child_list stat index of residues to look at

    end_index - child_list end index
    
    hidden arguments:

    aas.scr, pro.scr, gly.scr - the scoring tables
    need to be present in working directory
    
    """
    from pro_angle import find_residue
    from Bio.PDB.PDBParser import PDBParser
    from pro_length import length
    import os
    import string

    score = 0 #initialize    
    pars = PDBParser(PERMISSIVE = 1)
    struct = pars.get_structure(PDBfile.rstrip('.pdb'), PDBfile)
    model = struct.child_list[0]
    chain = model.child_list[0]

    score = float(0)
    size=length(chain)

    if end_index > (size - 2) :
        end_index = size - 2

    #non-mutated score
    if mutated_list == ' ':        
        for res_index in range(start_index, end_index): #not first or last res
            res = chain.child_list[res_index]
            cur = res.resname
            pre = chain.child_list[res_index-1].resname
            pos = chain.child_list[res_index+1].resname
            if smoothed == 'y':
                filename = pre + '_' + cur + '_' + pos + '.smooth.scr'
            else:
                filename = pre + '_' + cur + '_' + pos + '.scr'        
            table_file = '/home/marciovm/proteins/bdtrimers/' + \
            string.lower(cur) + '/' + filename
            chain_index = chain.child_list[res_index].id[1]            

#           print 'loading table file: ' + table_file ### debugging
            
            table = load_scores(table_file)
            if table != 0:
                new = score_help(chain, chain_index, table)
            else:
                new = 0
            score = score + new
    else:
        mutated_form = mutated_list.split()       
        for res_index in range(start_index, end_index):
            res = chain.child_list[res_index]
            mutated_index = res_index - start_index            
            cur = mutated_form[res_index-start_index]
            if mutated_index == 0:
                pre = chain.child_list[res_index-1].resname
            else:
                pre = mutated_form[mutated_index-1]
            if mutated_index == end_index - start_index - 1:
                pos = chain.child_list[res_index+1].resname
            else:
                pos = mutated_form[mutated_index+1]
            if smoothed == 'y':
                filename = pre + '_' + cur + '_' + pos + '.smooth.scr'
            else:
                filename = pre + '_' + cur + '_' + pos + '.scr'
                
            table_file = '/home/marciovm/proteins/bdtrimers/' + \
            string.lower(cur) + '/' + filename
            chain_index = chain.child_list[res_index].id[1]

            # print 'loading table file: ' + table_file ##debugging
            
            table = load_scores(table_file)
            if table != 0:
                new = score_help(chain, chain_index, table)
            else:
                new = 0
            score = score + new            
    try:
        score = (score/size)*1000 #normalize score
        return score
    except ZeroDivisionError:
        print "calculated protein length 0 -> returning score 0"
        score = 0
        return score
def compile_trimers():

    import os
    import commands
    import sys
    import string
    from Bio.PDB.PDBParser import PDBParser
    from pro_length import length

    os.chdir('/home/marciovm/proteins/bigdist')  ##choose directory

    list = commands.getoutput(
        'ls *.ent').split()  #list of pdb files w/ chain names

    start = sys.argv[1]
    end = sys.argv[2]

    aas = [
        'GLY', 'ALA', 'VAL', 'LEU', 'ILE', 'SER', 'THR', 'ASN', 'GLN', 'PHE',
        'TYR', 'TRP', 'CYS', 'MET', 'PRO', 'ASP', 'GLU', 'LYS', 'ARG', 'HIS'
    ]

    for pdb_index in range(string.atoi(start), string.atoi(end)):
        pdb = list[pdb_index]

        pars = PDBParser(PERMISSIVE=1)
        struct = pars.get_structure(pdb, pdb)
        model = struct.child_list[0]
        if pdb[4] == '.':
            chain_name = ' '
        else:
            chain_name = string.upper(pdb[4])
        chain = model.child_dict[chain_name]

        print 'processing ' + pdb + ' index: ' + str(pdb_index)

        ##find first real residue
        index = 0
        res = 'first'
        while aas.count(res) == 0 and index < 10000:
            try:
                res = chain.child_list[index].resname
            except KeyError:
                pass  #nothing here
            index += 1
        res = chain.child_list[index].resname  ##should be 2nd real residue
        ##find last real residue
        final_res_index = length(chain) + index - 3  ##2nd to last real residue
        ##look at everything in between
        for aa_index in range(index, final_res_index):
            aacur = chain.child_list[aa_index].resname
            aapre = chain.child_list[aa_index - 1].resname
            aapost = chain.child_list[aa_index + 1].resname
            seq = aapre + '_' + aacur + "_" + aapost + '.dat'
            filename = ('/home/marciovm/proteins/bdtrimers/' +
                        string.lower(aacur) + '/' + seq)
            try:
                file_out = open(filename, 'a')
            except IOError:
                print "IOError line 63, nostandard aa name in PDB"
            else:
                tuple = calc_all_dihedrals(chain, aa_index)
                if tuple:
                    if len(tuple) > 2:
                        file_out.write(
                            str(tuple[0]).zfill(6) + '  ' +
                            str(tuple[1]).zfill(6) + '  ' +
                            str(tuple[2]).zfill(6) + '  ' +
                            str(tuple[3]).zfill(6) + '\n')
                    else:
                        file_out.write(
                            str(tuple[0]).zfill(6) + '  ' +
                            str(tuple[1]).zfill(6) + '\n')
                file_out.close()