Example #1
0
 def __init__(self, params):
     #
     # Initialise
     #
     self.params = params
     import pKaTool.pKaIO as pKaIO
     self.pdbfile = self.params['pdb']
     self.MCsteps = int(self.params['MCsteps'])
     self.pHstep = float(self.params['pHstep'])
     self.pHstart = float(self.params['pHstart'])
     self.pHend = float(self.params['pHend'])
     IO = pKaIO.pKaIO(self.pdbfile)
     #
     # Define the tmpdir for all our calcs
     #
     import os
     self.topdir = os.path.split(self.pdbfile)[0]
     self.topdir = os.path.join(
         self.topdir, '%s_autonomy' % os.path.split(self.pdbfile)[1])
     if not os.path.isdir(self.topdir):
         os.mkdir(self.topdir)
     #
     # Did we find a completed pKa calculation for the PDB file
     #
     if IO.calculation_completed:
         #
         # All files found
         #
         print 'pKa calculation files found'
     else:
         print
         print 'I could not find a completed pKa calculation for the specified PDB file'
         print 'Please complete the pKa calculation first'
         print
         raise Exception()
     #
     # Get the wild type titration curves calculated with WHAT IF
     #
     import pKaTool.pKaIO
     IO = pKaTool.pKaIO.pKaIO(self.pdbfile)
     self.wt_titcurv = IO.read_titration_curve()
     self.wt_pKas = IO.readpka()
     #
     # Recalculate titration curves with the CPP algorithm
     #
     import os
     dirname = os.path.join(self.topdir, 'wt_recalc')
     if not os.path.isdir(dirname):
         os.mkdir(dirname)
     filename = os.path.join(dirname, 'WT.DAT')
     data = None
     if os.path.isfile(filename):
         try:
             fd = open(filename)
             import cPickle
             data = cPickle.load(fd)
             fd.close()
             self.wtpkas = data['pkas']
             self.wt_titcurv = data['titcurv']
             print 'Loaded wild type recalculated titration curves'
             print 'Loaded %d titration curves' % (len(
                 self.wt_titcurv.keys()))
         except:
             data = None
     #
     # If we don't have the data calculate it
     #
     if data is None:
         import pKa_MC
         print 'Recalculating wild type titration curves'
         pKa_params = {
             'pHstart': self.pHstart,
             'pHstop': self.pHend,
             'pHstep': self.pHstep,
             'pKMCsteps': self.MCsteps,
             'verbose': 1
         }
         self.pKaCALC = pKa_MC.pKa_calculation_class(self.pdbfile,
                                                     pKa_info=None,
                                                     params=pKa_params,
                                                     parent=self)
         self.pKaCALC.set_MC_CPP()
         self.pKaCALC.set_reporter_groups(self.wt_pKas.keys())
         self.wtpkas, self.wt_titcurv = self.pKaCALC.calc_wt_pkas()
         #
         # Save the data
         #
         fd = open(filename, 'w')
         import cPickle
         data = cPickle.dump(
             {
                 'pkas': self.wtpkas,
                 'titcurv': self.wt_titcurv
             }, fd)
         fd.close()
         print 'done'
         print
         print 'Calculated %d titration curves' % (len(
             self.wt_titcurv.keys()))
     #
     # Get the in_system energies
     #
     self.in_data = self.calculate_insystem_dpKas()
     #
     # Get the ex_data
     #
     self.ex_data = self.calculate_exsystem_dpKas()
     return
Example #2
0
def main():
    #
    # Do all the analyses we want
    #
    print
    print 'Design_plots.py: Do all analyses of the Design_pKa runs'
    print
    print 'Usage Design_plots.py [files] <type>'
    print
    #
    # dpKa vs. distance from active site & number of mutations
    #
    import sys
    #
    # Get the type
    #
    type=sys.argv[-1]
    if type=='two':
        #
        # Analysing a single group
        #
        files=get_files(sys.argv[1:-2],type,sys.argv[-2])
    else:
        #
        # Get the files
        #
        files=get_files(sys.argv[1:-1],type)
    #
    # If not files then exit
    #
    if files==[]:
        print 'Error: Did not find any files to match criteria'
        return
    #
    # Prepare the data matrix
    #
    raw_data={}
    max_dist=25
    max_muts=20
    distance_range=range(max_dist+1)
    nummuts_range=range(1,max_muts+1)
    for num in nummuts_range:
        raw_data[num]={}
        for dist in distance_range:
            raw_data[num][dist]=[0.0]
    
    #
    # Loop over all the files
    #
    added=0
    big_dict={}
    tot_target={}
    for file in files:
        if file[-5:]=='.lock':
            continue
        print 'Processing %s' %file
        try:
            import pickle
            fd=open(file)
            d=pickle.load(fd)
            fd.close()
        except:
            continue
        #
        # Set the prefix
        #
        prefix=get_prefix(file)
        #
        # -----------------------------------
        #
        # Loop over all the design-data
        #
        targets=d.keys()
        targets.sort()
        for target in targets:
            #if target!=':0231:ASP':
            #    continue
            #
            # pdbfile and wt_full are not interesting
            #
            if target=='pdbfile' or target=='wt_full':
                continue
            target_pka=d[target]
            designs=target_pka.keys()
            designs.sort()
            if designs==['pKa out of range']:
                continue
            #
            # Loop over each design (normally +20 and -20 for Design_dist_nummuts)
            #
            for design in designs:
                #if design!='m20':
                #    continue
                try:
                    nummuts=target_pka[design].keys()
                except:
                    #print 'Skipping:',target_pka[design]
                    continue
                nummuts.sort()
                for num in nummuts:
                    dist_cutoffs=target_pka[design][num].keys()
                    for cutoff in dist_cutoffs:
                        #text='%15s %4s #muts: %2d, dist_co: %5.2f, sols:' %(target,design,num,float(cutoff))
                        #print text
                        #
                        # Make sure we have a bin for the this distance cutoff
                        #
                        #if not raw_data[num].has_key(cutoff):
                        #    raw_data[num][cutoff]=[]
                        #
                        # Loop over all solutions and store the dpKa values
                        #
                        sol_dict=target_pka[design][num][cutoff]
                        solutions=sol_dict.keys()
                        #
                        # Loop over all the solutions
                        #
                        for sol in solutions:
                            if sol_dict[sol].has_key(type):
                                dpka=sol_dict[sol][type][target]
                                mutations=sol_dict[sol]['mutations']
                                #
                                # Count the number of mutations
                                #
                                nums=0
                                for mut in mutations:
                                    if mut:
                                        nums=nums+1
                                #
                                # Add the data to the array
                                #
                                # We skip all data points outside the range specified
                                # by max_muts and max_dist
                                #
                                skip=None
                                if not raw_data.has_key(nums):
                                    skip=1
                                if not skip:
                                    if not raw_data[nums].has_key(cutoff):
                                        skip=1
                                if not skip:
                                    raw_data[nums][cutoff].append(dpka)
                                #
                                # Add to the big dictionary
                                #
                                import os
                                
                                tname=prefix+target
                                if not big_dict.has_key(tname):
                                    big_dict[tname]=[]
                                clean_muts=[]
                                for mut in mutations:
                                    if mut:
                                        clean_muts.append(mut)
                                big_dict[tname].append([clean_muts,dpka])

                                #
                                # Keep track of how many we add
                                #
                                added=added+1
                                #print 'Adding: nummuts: %2d, cutoff: %4.1f, dpka: %4.2f' %(nums,cutoff,dpka)
                                #except:
                                #    pass
            #print '--------------------'
    #
    # Read the definition of the active site
    #
    act_site=read_actsit_def()
    #
    # Get properties from the PDB files/wt pKa calculation
    #
    import string, os
    for file in files:
        if file[-5:]=='.lock':
            continue
        prefix=get_prefix(file)
        #
        # Analysis
        #
        print 'Analysing for %s' %prefix
        #
        # Read the PDB file
        #
        
        pdbfile=os.path.join(basedir,prefix[:4],prefix)
        import Protool
        Z=Protool.structureIO()
        Z.readpdb(pdbfile)
        #
        # Get the relative accs
        #
        import WI_tools
        accs=WI_tools.relative_accessibility(pdbfile)
        #
        # Open the wt pKa calc
        #
        import pKaTool.pKaIO as pKaIO
        X=pKaIO.pKaIO(pdbfile)
        pkavals=X.readpka()
        matrix=X.read_matrix()
        for residue in pkavals.keys():
            target=prefix+residue
            if not tot_target.has_key(target):
                tot_target[target]={}
            tot_target[target]['pKa']=pkavals[residue]['pKa']
            elecs=[]
            for other_res in matrix[residue].keys():
                elecs.append(matrix[residue][other_res][0])
            tot_target[target]['elecs']=elecs[:]
            #
            # Insert number of aas
            #
            tot_target[target]['prot_aas']=len(Z.residues.keys())
            #
            # Is this target in the vicinity of the active site?
            #
            tot_target[target]['act_site']=None
            target_res=target.split('pdb')[1]
            target_res=':'+target_res.split(':')[1]
            try:
                target_atoms=Z.residues[target_res]
            except:
                print target_res
                print Z.residues.keys()
                stop
            if act_site.has_key(prefix):
                for act_res in act_site[prefix]:
                    r_act_res=':'+act_res.split(':')[1]
                    for atom2 in Z.residues[r_act_res]:
                        for target_atom in target_atoms:
                            #print 'Comparing',target_atom,atom2
                            if Z.distance(target_atom,atom2)<5.0:
                                tot_target[target]['act_site']='Yes'
            #
            # Insert rel. acc
            #
            if residue[-6:]==':CTERM':
                residue=residue[:-6]
            if residue[-6:]==':NTERM':
                residue=residue[:-6]
            #print accs[residue]['sum']
            tot_target[target]['relacc']=accs[residue]['sum']['rel']
            #print residue,accs[residue]
    print
    print ' All done'
    #
    # How many solutions in total?
    #
    print 'I added %5d solutions to the matrix' %added
    #
    # For each target, what's the maximum dpKa?
    #
    targets=big_dict.keys()
    targets.sort()
    max_dpkas=[]
    all=[]
    actsite_dpkas=[]
    all_actsite_dpkas=[]
    file_dpkas={}
    for target in targets:
        tmp_dpkas=[]
        for solution,dpka in big_dict[target]:
            tmp_dpkas.append(abs(dpka))
            if not file_dpkas.has_key(target[:4]):
                file_dpkas[target[:4]]={}
                file_dpkas[target[:4]]['dpkas']=[]
                file_dpkas[target[:4]]['num_target']=0
                file_dpkas[target[:4]]['max_dpka']=0.0
            #
            # Add the new dpKa
            #
            file_dpkas[target[:4]]['dpkas'].append(abs(dpka))
            
        avg,var,sdev=average(tmp_dpkas)
        print 'Average pKa shift for %25s is %5.2f (%5.2f)' %(target,avg,sdev)
        tmp_dpkas.sort()
        max_dpka=tmp_dpkas[-1]
        max_dpkas.append(max_dpka)
        all=all+tmp_dpkas
        #
        # Store the average and max dpka for each target
        #
        tot_target[target]['avg_dpka']=avg
        tot_target[target]['max_dpka']=max_dpka
        #
        # Set the aa size
        #
        file_dpkas[target[:4]]['prot_aas']=tot_target[target]['prot_aas']
        #
        # Increment the number of targets designed for this protein
        #
        file_dpkas[target[:4]]['num_target']=file_dpkas[target[:4]]['num_target']+1
        #
        # Is is an active site target?
        #
        if tot_target[target]['act_site']:
            actsite_dpkas.append(max_dpka)
            all_actsite_dpkas=all_actsite_dpkas+tmp_dpkas
    #
    # Write the PDB files
    #
    for file in files:
        tf_max=[]
        if file[-5:]=='.lock':
            continue
        prefix=get_prefix(file)
        #
        # Writing Yasara script
        #
        print 'Writing Yasara script for %s' %prefix
        #
        # Read the PDB file
        #
        fd=open('yasara.mcr','w')
        pdbfile=os.path.join(basedir,prefix[:4],prefix)
        fd.write('LoadPDB %s\n' %pdbfile)
        fd.write('ColorAll 606060\n')
        fd.write('Style Stick\n')
        fd.write('HUD Off\n')
        fd.write('HideRes Hoh\n')
        import Protool
        Z=Protool.structureIO()
        Z.readpdb(pdbfile)
        #
        # Zero all B-factors
        #
        #for residue in Z.residues.keys():
        #    for atom in Z.residues[residue]:
        #        Z.atoms[atom]['B-factor']=0.0
        #
        # Loop over all targets and set the colour
        #
        colors={1.0:'Blue',
                2.0:'Cyan',
                3.0:'Green',
                4.0:'Yellow',
                5.0:'Red'}
        for target in tot_target.keys():
            #
            # Collect stats on the max abs(dpka)
            #
            pos=target.find(prefix)
            if pos!=-1:
                if tot_target[target].has_key('max_dpka'):
                    tf_max.append(abs(tot_target[target]['max_dpka']))
            #
            # Write the PDB file
            #
            if pos!=-1:
                resnum=target[pos+len(prefix):]
                resnum=':'+resnum.split(':')[1]
                if Z.residues.has_key(resnum):
                    if tot_target[target].has_key('max_dpka'):
                        col_cutoff=colors.keys()
                        col_cutoff.sort()
                        co=0.5
                        for col in col_cutoff:
                            co=col
                            if tot_target[target]['max_dpka']<col:
                                break
                        colour=colors[co]
                        fd.write('ColorRes %d,%s\n' %(int(resnum[1:]),colour))
                    else:
                        fd.write('ColorRes %d,%s\n' %(int(resnum[1:]),'aaaaaa'))
                else:
                    raise 'Residue not found',target
        #Z.writepdb('BF_dpKa')
        print 'Number of max_pkas in %s is %d' %(prefix,len(tf_max))
        avg,var,sdev=average(tf_max)
        print '%s, average max dpKa %5.1f, sdev: %5.1f' %(prefix,avg,sdev)
    #fd.write('exit\n')
    fd.close()
    #
    # Print all the stats
    #
    print
    print 'Number of targets designed is       : %4d  ' %(len(targets))
    all_targets=len(tot_target.keys())
    print 'Number of targets in total:         : %4d  ' %all_targets
    print '%% designed                         : %5.2f' %(float(len(targets))/float(all_targets)*100.0)
    print
    print 'Number of active site targets       : %5.2f' %(len(actsite_dpkas))
    #
    # Get average Delta pKas
    #
    avg,var,sdev=average(all)
    print 'Average dpKa for all targets is      : %5.2f (%5.2f)' %(avg,sdev)
    avg,var,sdev=average(max_dpkas)
    print 'Average MAX dpKa for all targets is  :%5.2f  (%5.2f)' %(avg,sdev)
    # Max dpka for active sites
    avg,var,sdev=average(actsite_dpkas)
    print 'Average MAX dpKa for active site targets: %5.2f (%5.2f)' %(avg,sdev)
    #
    avg,var,sdev=average(all_actsite_dpkas)
    print 'Average dpKa for actsit target   :%5.2f  (%5.2f)' %(avg,sdev)
    print
    print 'Average dpKa per protein'
    prots=file_dpkas.keys()
    prots.sort()
    for prot in prots:
        avg,var,sdev=average(file_dpkas[prot]['dpkas'])
        num_target=file_dpkas[prot]['num_target']
        aa_size=file_dpkas[prot]['prot_aas']
        num_sol=len(file_dpkas[prot]['dpkas'])
        print 'Average dpKa for %s is               : %5.2f (%5.2f) [#targets %4d, #aas %4d, #sols/target %5.2f]' %(prot,avg,sdev,num_target,aa_size,float(num_sol)/float(num_target))
    #
    # Stats on the types of targets designed
    #
    designed={}
    import pKarun
    Y=pKarun.pKanalyse()
    for target in big_dict.keys():
        rtype=Y.get_residue_type(target)
        if not designed.has_key(rtype):
            designed[rtype]=0
        designed[rtype]=designed[rtype]+1
    des=designed.keys()
    #
    # Look at the targets not designed
    #
    not_designed={}
    all_targets=tot_target.keys()
    all_targets.sort()
    import pKarun
    Y=pKarun.pKanalyse()
    for target in all_targets:
        if not big_dict.has_key(target):
            rtype=Y.get_residue_type(target)
            if not not_designed.has_key(rtype):
                not_designed[rtype]=0
            not_designed[rtype]=not_designed[rtype]+1
    #
    # Stats
    #
    print
    print 'Stats on types of groups designed'
    types=['ASP','GLU','TYR','CYS','CTERM','NTERM','LYS','ARG','HIS']
    types.sort()
    for rtyp in types:
        if designed.has_key(rtyp):
            des=designed[rtyp]
        else:
            des=0
        if not_designed.has_key(rtyp):
            ndes=not_designed[rtyp]
        else:
            ndes=0
        tot=ndes+des
        if tot>0:
            avg='%5.2f' %(float(des)/float(tot)*100.0)
        else:
            avg='NA'
        print '%8s des: %3d notD: %3d, tot: %3d %% designed: %s' %(rtyp,des,ndes,tot,avg)
    #
    # Relation between average dpKa obtained and accessibility, type and electrostatic interactions.
    #
    print
    #
    # Plot of avg dpKa vs. sum of abs electrostatic interactions
    #
    avg_dpka=[]
    max_dpka=[]
    sum_elec=[]
    acc=[]
    for target in all_targets:
        dpkas=[]
        if big_dict.has_key(target):
            for mutants,dpka in big_dict[target]:
                dpkas.append(abs(dpka))
            e_sum=[]
            for elec in tot_target[target]['elecs']:
                e_sum.append(elec)
            #
            max_dpka.append(max(dpkas))
            #
            avg,var,sdev=average(dpkas)
            avg_dpka.append(avg)
            #
            avg,var,sdev=average(e_sum)
            sum_elec.append(get_sum(e_sum))
            #
            # Accessibility
            #
            acc.append(tot_target[target]['relacc'])
        else:
            #print 'No design for',target
            pass
    import dislin_driver
    file=dislin_driver.graf_mult2(acc,[avg_dpka,max_dpka],
                                  title='Effect of solvent exposure',
                                  x_legend='Relative accessibility of target',
                                  y_legend='abs(dpKa)',
                                  legends=['Avg. dpKa','Max. dpKa'])
    #os.system('eog %s' %file)
    #
    # Any difference for active site targets?
    #
    
    #
    # Plot it
    #
    nummuts={}
    nums=raw_data.keys()
    nums.sort()
    for num in nums:
        for co in raw_data[num].keys():
            max_val=-1.0
            sum=0.0
            count=0
            for dpka in raw_data[num][co]:
               if abs(dpka)>max_val:
                    max_val=abs(dpka)
               if dpka>0.01:
                   sum=sum+abs(dpka)
                   count=count+1
               #
               # Sort as function of number of mutations for other stat
               #
               if not nummuts.has_key(num):
                   nummuts[num]=[]
               nummuts[num].append(abs(dpka))
            if count==0:
                raw_data[num][co]=0
            else:
                raw_data[num][co]=float(sum)/float(count)
            #raw_data[num][co]=max_val
    import dislin_driver
    #dislin_driver.colour_2D(raw_data,'','','# of mutations','distance from target (A)','abs(dpKa)','dpka.tif')
    import os
    #os.system('gimp dpka.tif')
    #
    # Get dpKa as a function of # of mutants
    #
    #nums=nummuts.keys()
    #nums.sort()
    #x=[]
    #y=[]
    #for num in nums:
    #    for dpka in nummuts[num]:
    #        x.append(num)
    #        y.append(dpka)
    #file=dislin_driver.graf_mult2(x,[y],
    #                              title='dpKa, number of mutations',
    #                              x_legend='Number of mutations',
    #                              y_legend='abs(dpKa)')
    #os.system('gimp %s' %file)
    #
    # Save bigdict
    #
    fd=open('/home/nielsen/pKa-design/done_distnummuts/bigdict','w')
    import pickle
    pickle.dump(big_dict,fd)
    fd.close()
Example #3
0
 def __init__(self,params):
     #
     # Initialise
     #
     self.params=params
     import pKaTool.pKaIO as pKaIO
     self.pdbfile=self.params['pdb']
     self.MCsteps=int(self.params['MCsteps'])
     self.pHstep=float(self.params['pHstep'])
     self.pHstart=float(self.params['pHstart'])
     self.pHend=float(self.params['pHend'])
     IO=pKaIO.pKaIO(self.pdbfile)
     #
     # Define the tmpdir for all our calcs
     #
     import os
     self.topdir=os.path.split(self.pdbfile)[0]
     self.topdir=os.path.join(self.topdir,'%s_autonomy' %os.path.split(self.pdbfile)[1])
     if not os.path.isdir(self.topdir):
         os.mkdir(self.topdir)
     #
     # Did we find a completed pKa calculation for the PDB file
     #
     if IO.calculation_completed:
         #
         # All files found
         #
         print 'pKa calculation files found'
     else:
         print
         print 'I could not find a completed pKa calculation for the specified PDB file'
         print 'Please complete the pKa calculation first'
         print
         raise Exception()
     #
     # Get the wild type titration curves calculated with WHAT IF
     #
     import pKaTool.pKaIO
     IO=pKaTool.pKaIO.pKaIO(self.pdbfile)
     self.wt_titcurv=IO.read_titration_curve()
     self.wt_pKas=IO.readpka()
     #
     # Recalculate titration curves with the CPP algorithm
     #
     import os
     dirname=os.path.join(self.topdir,'wt_recalc')
     if not os.path.isdir(dirname):
         os.mkdir(dirname)
     filename=os.path.join(dirname,'WT.DAT')
     data=None
     if os.path.isfile(filename):
         try:
             fd=open(filename)
             import cPickle
             data=cPickle.load(fd)
             fd.close()
             self.wtpkas=data['pkas']
             self.wt_titcurv=data['titcurv']
             print 'Loaded wild type recalculated titration curves'
             print 'Loaded %d titration curves' %(len(self.wt_titcurv.keys()))
         except:
             data=None
     #
     # If we don't have the data calculate it
     #
     if data is None:
         import pKa_MC
         print 'Recalculating wild type titration curves'
         pKa_params={'pHstart':self.pHstart,'pHstop':self.pHend,'pHstep':self.pHstep,'pKMCsteps':self.MCsteps,'verbose':1}
         self.pKaCALC=pKa_MC.pKa_calculation_class(self.pdbfile,pKa_info=None,params=pKa_params,parent=self)
         self.pKaCALC.set_MC_CPP()
         self.pKaCALC.set_reporter_groups(self.wt_pKas.keys())
         self.wtpkas,self.wt_titcurv=self.pKaCALC.calc_wt_pkas()
         #
         # Save the data
         #
         fd=open(filename,'w')
         import cPickle
         data=cPickle.dump({'pkas':self.wtpkas,'titcurv':self.wt_titcurv},fd)
         fd.close()
         print 'done'
         print
         print 'Calculated %d titration curves' %(len(self.wt_titcurv.keys()))
     #
     # Get the in_system energies
     # 
     self.in_data=self.calculate_insystem_dpKas()
     #
     # Get the ex_data
     #
     self.ex_data=self.calculate_exsystem_dpKas()
     return