def msai2resi(): if len(sys.argv) < 4: print 'msai2resi: output the mapping between msa position index and pdb residue number' print 'example:python utils_msa.py msai2resi PF07714_full.fa BTK_HUMAN 1k2p.pdb\n' print 'output: PF07714_full.fa.1k2p.pdb.map' return msafile = sys.argv[2] target = sys.argv[3] pdbfile = sys.argv[4] outfile = msafile+'.'+pdbfile+'.map' print 'msafile: %s\ntarget header: %s\npdbfile: %s\noutput file: %s' % (msafile, target, pdbfile, outfile) m = msa(msafile) p = protein(pdbfile) rtmap = m.getResiTargetMap(p, target) if len(rtmap) < 1: print 'error occoured in generating rtmap' return #print '%s: %s' % (tvar, repr(rtmap[tvar])) # construct trmap from rtmap # 3128: (B641, 'R') trmap = {} #trmap = {v: k for k, v in rtmap.iteritems()} fout = open(outfile ,'w') for k in rtmap: msai, resn = rtmap[k] if msai in trmap: print 'error. duplicate key [%d] in rtmap' % msai return trmap[msai] = (k, resn) fout.write('%d %d %d' % (msai, k, resn)) fout.close()
def main(): if len(sys.argv)< 2: print "Usage: proc_spectralFilter.py pdb(tip)file >> tip_clusters.txt" return p=protein(sys.argv[1], 'v4',center='TIP') p.spectralClustering(6)
def main(): if len(sys.argv) < 2: print "Usage python proc_extractDomain.py domain_desc_file" return fin = open(sys.argv[1], 'r') for line in fin.readlines(): line = line.strip() strArr = line.split(',') tip_filename = strArr[0] start = int(strArr[1]) end = int(strArr[2]) print tip_filename+'.domain' fo = open(tip_filename+'.domain', 'w') p = protein('a'+tip_filename+'.tip', 'alpha',center='TIP') for a in p.atoms: if a.resSeq >= start and a.resSeq <= end: fo.write(a.writeAtom()) fo.close() fin.close()
def main(): if len(sys.argv)< 2: print "Usage: proc_SinglePDBFilter.py pdb(tip)file >> tip_clusters.txt" return p=protein(sys.argv[1], 'v3',center='TIP') p.filterClusters()
def writencg(): if len(sys.argv) < 4: print 'writencg(): write non parametric contact group matrix for a (coarse-grained) pdb with size cutoff' print 'writencg(): python utils_ncg.py writencg 1t3r.pdb 3' return pdbfile = sys.argv[2] size = int(sys.argv[3]) outfile = pdbfile+'.ncg' print 'writencg(): pdbfile: %s' % pdbfile print 'writencg(): ncg size: %d' % size print 'writencg(): output: %s' % outfile ncgArray = [] p = protein(pdbfile) for a in p.atoms: c = ncg(a, size) ncgArray.append(c) fout = open(outfile, 'w') for c in ncgArray: c.grow(p.atoms) fout.write(c.outStr()+'\n') fout.close()
def main(): if len(sys.argv)< 3: print "Usage: proc_N-mer.py tip_file cluster_file n_mer dist_cutoff >> n_mer_outfile" return infile = sys.argv[1] infile2 = sys.argv[2] n_mer = int(sys.argv[3]) cutoff = float(sys.argv[4]) p = protein(infile, center='TIP') p.initCGResiMap() with open(infile2) as fp: for line in fp: cg = cgroup(line.strip()) if cg.getSize() == n_mer: if p.cgResiGroupFilter(cg, cutoff) == True: print cg.getString() elif cg.getSize() < n_mer: continue else: # generate combinations for idx in list(itertools.combinations(range(cg.getSize()),n_mer)): sub_cg = cgroup() sub_cg.pdb = cg.pdb sub_cg.chain = cg.chain for i in idx: # iterate all the tuples sub_cg.AAgroup = sub_cg.AAgroup + cg.AAgroup[i] sub_cg.resi.append(cg.resi[i]) if p.cgResiGroupFilter(sub_cg, cutoff) == True: print sub_cg.getString() fp.close()
def ncg2sdiicol(): if len(sys.argv)<7: print 'ncg2sdiicol: write selected MSA column into .sdiicol file' print 'python utils_msa.py ncg2sdiicol 1aps_A_1_97.rpdb.tip 1aps_A_1_97.rpdb.tip.ncg PF00708_full.txt.rseq PF00708_full.txt.all_2_sdiii ACYP2_HORSE 2' return pdbfile = sys.argv[2] # pdb name ncgfile = sys.argv[3] # hcg msafile = sys.argv[4] # msa (full or reduced) sdiifile = sys.argv[5] # sdii target = sys.argv[6] # target name orderlist = [int(i) for i in sys.argv[7].split(',')] outfile = pdbfile[0:4]+'_'+msafile[0:7]+'.sdiicol'# new substitution matrix print 'pdbfile :%s' % pdbfile print 'ncgfile :%s' % ncgfile print 'msafile :%s' % msafile print 'sdiifile :%s' % sdiifile print 'uniprot name :%s' % target print 'ncg order list : [%s]' % repr(orderlist) print 'outfile: %s' % outfile # get msa in matrix format m = msa(msafile) msaMatrix = np.array([list(s[1]) for s in m.msaArray]) # matrix format of msa #for i in xrange(0, len(seqs)): # print seqs[i] print 'msa matrix: ' + repr(msaMatrix.shape) # get resi -> msai map p = protein(pdbfile) rtmap = m.getResiTargetMap(p, target) # ('A9', (14, 'V')) : (resi+chain, (MSA index, resn)) sdiidict = loadsdii(sdiifile) # key: 39-140-210, value = 0.0788593466276019 msaGroupArray = ncg2msa(ncgfile, rtmap) # unsorted [[86, 83, 198, 127, 120], [138, 76, 82, 127, 132]] # output msa column set colset = set() for i in orderlist: for g in msaGroupArray: rg = g[0:i] # get ith order contact group rg.sort() # for generating key sdiikey = '-'.join([str(r) for r in rg]) if sdiikey not in sdiidict: #print 'ncg2sdiicol(): discard group: %s for low sdii' % sdiikey continue print (sdiikey, sdiidict[sdiikey]) for resi in rg: # for significant ncg, add corresponding MSA column index colset.add(resi) print 'ncg2sdiicol():writing %s: %s' % (outfile, repr(colset)) fout = open(outfile, 'w') fout.write(' '.join([str(c) for c in colset])) fout.close()
def getresset(): if len(sys.argv) < 2: print 'getresset(): python utils_sdii.py getresset result_sdii' return pdbfile = sys.argv[2] chain = sys.argv[3] p = protein(pdbfile, chain) print p.seq
def main(): fin = open('pdblist.txt', 'r') lines = fin.readlines() fin.close() for i in xrange(0,len(lines)): line = lines[i].strip() # pdb_filename=line+'.pdb' pdb_filename=line+'.tip' p=protein(pdb_filename,'fasta') p.writeSeq(line+'.fa') pass
def main(): fin = open('pdblist.txt', 'r') lines = fin.readlines() fin.close() for i in xrange(0,len(lines)): line = lines[i].strip() pdb_filename=line+'.pdb' print pdb_filename p=protein(pdb_filename,'CA_A') p.writeChainACA('ca_'+line+'.pdb') pass
def sdii2resi(): if len(sys.argv) < 5: print 'resi2target: given a residue number output the corresponding position in target msa' print 'example:python utils_msa.py sdii2resi PF07714_full.fa.r50 BTK_HUMAN 1k2p.pdb PF07714_full.fa.r50.3128_3_sdii\n' print 'output: PF07714_full.fa.r50.3128_3_sdii_resi' return msafile = sys.argv[2] target = sys.argv[3] pdbfile = sys.argv[4] sdiifile = sys.argv[5] print 'msafile: %s\ntarget header: %s\npdbfile: %s\nsdii file: %s' % (msafile, target, pdbfile, sdiifile) m = msa(msafile) p = protein(pdbfile) rtmap = m.getResiTargetMap(p, target) if len(rtmap) < 1: print 'error occoured in generating rtmap' return #print '%s: %s' % (tvar, repr(rtmap[tvar])) # construct trmap from rtmap # 3128: (B641, 'R') trmap = {} #trmap = {v: k for k, v in rtmap.iteritems()} for k in rtmap: msai, resn = rtmap[k] if msai in trmap: print 'error. duplicate key [%d] in rtmap' % msai return trmap[msai] = (k, resn) #print trmap # read sdii file with open(sdiifile) as f: sdiilines = f.readlines() outfile = sdiifile + '_resi' fout = open(outfile, 'w') # 52 [pid:20029] 926-3089-3128 0.001106226720675 count = 0 for line in sdiilines: count += 1 print '%d/%d processed ...' % (count, len(sdiilines)) strArr = line.strip().split(' ') msailist = strArr[2].split('-') sdiivalue = strArr[3] fout.write('%s %s\n' % ('-'.join([repr(trmap[int(i)]) for i in msailist]), sdiivalue)) fout.close() print 'done.\noutput file: [%s]' % outfile
def writeseq(): if len(sys.argv) < 3: print 'writeseq(): write pdb sequence' print 'writeseq(): python utils_protein.py writeseq 1t3r.pdb' print 'writeseq(): output: 1t3r.pdb.seq' return pdbfile = sys.argv[2] outfile = sys.argv[2]+'.seq' print 'writeseq(): pdbfile: %s' % pdbfile print 'writeseq(): outfile: %s' % outfile p = protein(pdbfile) fout = open(outfile, 'w') fout.write(p.seq+'\n') fout.close()
def searchpdbseq(): if len(sys.argv) < 2: print 'searchpdbseq: locate pdb sequence in MSA' print 'example: python utils_msa.py searchpdbseq PF07714_full.fa 1T49_A.pdb\n' return msafile = sys.argv[2] target = sys.argv[3] print 'msa file: %s' % msafile print 'pdb target: %s' % target m = msa(msafile) p = protein(target) if m.searchTargetPDB(p)==0: print 'cannot locate pdb sequence in MSA'
def main(): if len(sys.argv) < 2: print 'Usage: proc_getTip.py pdblist' return pdblist = sys.argv[1] fin = open(pdblist, 'r') lines = fin.readlines() fin.close() for i in xrange(0,len(lines)): line = lines[i].strip() #pdb_filename=line+'.pdb' pdb_filename=line print pdb_filename p=protein(pdb_filename) p.writeChainATips('AAtips.def',line+'.tip') pass
def resi2target(): if len(sys.argv) < 5: print 'resi2target: given a residue number output the corresponding position in target msa' print 'example:python utils_msa.py resi2target PF07714_full.fa.r50 BTK_HUMAN 1k2p.pdb B641\n' return msafile = sys.argv[2] target = sys.argv[3] pdbfile = sys.argv[4] tvar = sys.argv[5] print 'msafile: %s\ntarget header: %s\npdbfile: %s\ntarget variable: %s' % (msafile, target, pdbfile, tvar) m = msa(msafile) p = protein(pdbfile) print p.resDict[tvar] rtmap = m.getResiTargetMap(p, target) if len(rtmap) < 1: return print 'map %s: %s' % (tvar, repr(rtmap[tvar])) return (tvar, rtmap[tvar][0], rtmap[tvar][1])
def resi2msai(): if len(sys.argv) < 5: print 'resi2target: given a residue number output the corresponding position in target msa' print 'python utils_msa.py resi2msai PF00014_full.txt BPT1_BOVIN 5pti_pf.pdb A6' return msafile = sys.argv[2] target = sys.argv[3] pdbfile = sys.argv[4] tvar = sys.argv[5] print 'msafile: %s\ntarget header: %s\npdbfile: %s\ntarget variable: %s' % (msafile, target, pdbfile, tvar) m = msa(msafile) p = protein(pdbfile) print p.resDict[tvar] rtmap = m.getResiTargetMap(p, target) if len(rtmap) < 1: return print 'map %s: %s' % (tvar, repr(rtmap[tvar])) return (tvar, rtmap[tvar][0], rtmap[tvar][1])
def pdbcut(): if len(sys.argv) < 5: print 'pdbcut(): write pdb by residue segment' print 'pdbcut(): python utils_protein.py pdbcut 1t3r.pdb A 5-15' print 'pdbcut(): python utils_protein.py pdbcut 1t3r.pdb all 5-15' return pdbfile = sys.argv[2] chain = sys.argv[3] rangeStr = sys.argv[4] rangeArray = rangeStr.split('-') rBegin = int(rangeArray[0]) rEnd = int(rangeArray[1]) pdbname = pdbfile[0:4] outfile = '%s_%s_%d-%d.rpdb' % (pdbname, chain, rBegin, rEnd) print 'pdbcut():pdbfile: %s' % pdbfile print 'pdbcut():pdb: %s' % pdbname print 'pdbcut():chain: %s' % chain print 'pdbcut():residue range: %d - %d' % (rBegin, rEnd) p = protein(pdbfile) out = [] if chain == 'all': for a in p.atoms: if a.resSeq <= rEnd and a.resSeq >= rBegin: out.append(a) else: for a in p.atoms: if (a.resSeq <= rEnd and a.resSeq >= rBegin and a.chainID.lower() == chain.lower()): out.append(a) fout = open(outfile, 'w') print 'pdbcut():output: %s' % outfile print 'pdbcut():%d atoms written.' % len(out) for a in out: fout.write(a.writeAtom()) fout.close()
def resn2bfactor(): if len(sys.argv) < 3: print 'resn2bfactor(): replace b factor values with residue type.' print 'resn2bfactor(): used for pymol spectrum b' return scoreValue = { 'X':0,'-': 0,'.': 0,'A': 1,'C': 2,'D': 3,'E': 4,'F': 5,'G': 6,'H': 7,'I': 8,'K': 9, 'L': 10,'M': 11,'N': 12,'P': 13,'Q': 14,'R': 15,'S': 16,'T': 17,'V': 18,'W': 19,'Y': 20, 'B': 3 } aamap = AAmap() pdbfile = sys.argv[2] p = protein(pdbfile) outfile = '%s_rb.pdb' % pdbfile[:-4] fout = open(outfile, 'w') for a in p.atoms: newBFactor = scoreValue[aamap.getAAmap(a.resName)] print 'new b-factor: [%s : %s] -> %d' % (a.resName, aamap.getAAmap(a.resName), newBFactor) a.tempFactor = newBFactor fout.write(a.writeAtom()) fout.close() print 'Output file: %s' % outfile
def main(): if len(sys.argv) < 3: print 'python proc_dendrogram.py preffix cutoff' exit preffix = sys.argv[1] cutoff = float(sys.argv[2]) # load tip pdb file pr = protein(preffix) aamap = AAmap() n = len(pr.atoms) resimap = {} print 'writing %s.resimap ...' % (preffix) fr = open(preffix+'.resimap', 'w') px = [] count = 0 for a in pr.atoms: px.append((a.x, a.y, a.z)) resimap[count] = ('%s%d' % (a.chainID, a.resSeq), aamap.getAAmap(a.resName)) fr.write('%d %s%d %s\n' % (count, a.chainID, a.resSeq, aamap.getAAmap(a.resName))) count+=1 fr.close() x = np.array(px) # calculate pairwised distance pdist = {} print 'writing %s.pdist ...' % (preffix) fo=open(preffix+'.pdist','w') for i in xrange(0,len(x)): for j in xrange(i+1,len(x)): dist = np.linalg.norm(x[i]-x[j]) pdist['%d-%d' % (i,j)] = dist fo.write('%d-%d : %f\n' % (i,j,dist)) fo.close() # for hc extraction hcdict = {} hclist = [] existdict = {} #linkage_matrix = linkage(x, "single") linkage_matrix = linkage(x, "complete") #ddata = augmented_dendrogram(linkage_matrix, color_threshold=1) #plt.show() print 'writing %s.hcluster ...' % (preffix) fo1 = open(preffix+'.hcluster', 'w') m = linkage_matrix for i in xrange(0,len(m)): #print '%d %d %d %f %d' % (n+i,m[i,0],m[i,1],m[i,2],m[i,3]) fo1.write('%d %d %d %f %d\n' % (n+i,m[i,0],m[i,1],m[i,2],m[i,3])) hcline = '%d %d %d %f %d' % (n+i,m[i,0],m[i,1],m[i,2],m[i,3]) h = hc(hcline, n) hcdict[h.clusterID] = h hclist.append(h) fo1.close() # resolve leaves for each cluster print 'resolving leaves ...' for h in hclist: h.getChildren(hcdict) #h.dump() print 'iterating clusters for largest proximity contact ...' for i in xrange(0, n): leafstr = '%d %d %d 0.0 1' % (i, i, i) h = hc(leafstr, n) h.leaves = [i] hcdict[i] = h #hcdict[i].dump() # add single leaf in for i in xrange(0, n): existdict[i]= True for h in hclist: if h.dist <= cutoff: if h.c1 in existdict and h.c2 in existdict: # both been checked before #print '1AA' if existdict[h.c1] == True and existdict[h.c2] == True: ret = checkProximity2(hcdict[h.c1], hcdict[h.c2], pdist, cutoff) existdict[h.clusterID] = ret if ret == True: # combine both and delete sub cluster in the dict existdict[h.c1] = False existdict[h.c2] = False elif existdict[h.c1] == False or existdict[h.c2] == False: existdict[h.clusterID] = False elif h.c1 in existdict and h.c2 not in existdict: #print '1AB' if existdict[h.c1] == False: # c1 is not a contact; get h existdict[h.clusterID] = False existdict[h.c2] = checkProximity(hcdict[h.c2], pdist, cutoff) # get c2 elif existdict[h.c1] == True: # c1 is a contact; get c2 then get h = c1 and c2 ret = checkProximity(hcdict[h.c2], pdist, cutoff) # get c2 existdict[h.c2] = ret if ret == False: existdict[h.clusterID] = False elif ret == True: # h.c2 is a contact ret1 = checkProximity2(hcdict[h.c1], hcdict[h.c2], pdist, cutoff) existdict[h.clusterID] = ret1 if ret1 == True: existdict[h.c1] = False existdict[h.c2] = False elif h.c1 not in existdict and h.c2 in existdict: #print '1BA' if existdict[h.c2] == False: # c2 is not a contact; get h existdict[h.clusterID] = False existdict[h.c1] = checkProximity(hcdict[h.c1], pdist, cutoff) # get c1 elif existdict[h.c2] == True: # c2 is a contact; get c1 then get h = c1 and c2 ret = checkProximity(hcdict[h.c1], pdist, cutoff) # get c1 existdict[h.c1] = ret if ret == False: existdict[h.clusterID] = False elif ret == True: # h.c1 is a contact ret1 = checkProximity2(hcdict[h.c1], hcdict[h.c2], pdist, cutoff) existdict[h.clusterID] = ret1 if ret1 == True: existdict[h.c1] = False existdict[h.c2] = False elif h.c1 not in existdict and h.c2 not in existdict: #print '1BB' r1 = checkProximity(hcdict[h.c1], pdist, cutoff) existdict[h.c1] = r1 r2 = checkProximity(hcdict[h.c2], pdist, cutoff) existdict[h.c2] = r2 if r1 == False or r2 == False: existdict[h.clusterID] = False elif r1 == True and r2 == True: ret = checkProximity2(hcdict[h.c1], hcdict[h.c2], pdist, cutoff) if ret == True: existdict[h.c1] = False existdict[h.c2] = False elif h.dist > cutoff: #print '0XX' existdict[h.clusterID] = False if h.c1 not in existdict: existdict[h.c1] = checkProximity(hcdict[h.c1], pdist, cutoff) if h.c2 not in existdict: existdict[h.c2] = checkProximity(hcdict[h.c2], pdist, cutoff) # print out the result print 'writing result into %s.hcg' % preffix fout = open(preffix+'.hcg', 'w') count=0 for hid in existdict: #if hid >= N and existdict[hid] == True: if existdict[hid] == True: #fout.write('%d: %r, %s' % (hid, existdict[hid], hcdict[hid].writeString())) fout.write('%s,%s\n' % (preffix, hcdict[hid].writeLeaves(resimap))) count+=len(hcdict[hid].leaves) print '%d leaves in total\n' % count
def ncg2blossum(): if len(sys.argv) < 7: print 'ncg2blossum: construct new substitution matrix from contact group' print 'example:python utils_msa.py ncg2blossum 5pti_pf.pdb 5pti_pf.tip.ncg PF00014_full.txt.rseq PF00014_full.txt.sdii BPT1_BOVIN order' print 'output: a substitution matrix file (same format as BLOSSUM62)' return #print sys.argv[0] # utils_msa.py #print sys.argv[1] # hcg2blossum pdbfile = sys.argv[2] # pdb name ncgfile = sys.argv[3] # hcg msafile = sys.argv[4] # msa (full or reduced) sdiifile = sys.argv[5] # sdii target = sys.argv[6] # target name order = int(sys.argv[7]) outfile = msafile[0:7]+".sm" # new substitution matrix # get msa in matrix format m = msa(msafile) msaMatrix = np.array([list(s[1]) for s in m.msaArray]) # matrix format of msa #for i in xrange(0, len(seqs)): # print seqs[i] print 'msa matrix: ' + repr(msaMatrix.shape) # get resi -> msai map p = protein(pdbfile) rtmap = m.getResiTargetMap(p, target) sdiidict = loadsdii(sdiifile) # key: 39-140-210, value = 0.0788593466276019 msaGroupArray = ncg2msa(ncgfile, rtmap) # [[210, 215], [106, 211], [73, 95, 166], [109, 124, 139]] # get non overlapped column indices colset = set() for g in msaGroupArray: rg = g[0:order] # get ith order contact group rg.sort() # for generating key sdiikey = '-'.join([str(r) for r in rg]) if sdiikey not in sdiidict: #print 'ncg2sdiicol(): discard group: %s for low sdii' % sdiikey continue print (sdiikey, sdiidict[sdiikey]) for resi in rg: # for significant ncg, add corresponding MSA column index colset.add(resi) # init substitution matrix EBlist = ['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V', 'B', 'Z', 'X', '*'] #AAlist = sorted(EBlist) #AAlist = sorted(['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V', 'B', 'Z', 'X', '*']) AAlist = sorted(['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V']) sm = {} for i in xrange(0, len(AAlist)): for j in xrange(i, len(AAlist)): key = '%s%s' % (AAlist[i], AAlist[j]) sm[key] = 0 print AAlist print 'Alphabet: %d' % len(AAlist) print 'AA: %d' % len(sm) # accumulate substitution matrix AA frequency for all the contact group columns # Sum the scores for each columns across column print '' w = 0 # count column number for col in colset: w+=1 calcColSM(sm, msaMatrix, col) ''' for mg in msaGroupArray: # form key for co-evolve value sdiikey = '-'.join([str(i) for i in mg]) if sdiikey not in sdiidict: print 'hcg2blossum():discard group: %s' % sdiikey continue sdiiweight = sdiidict[sdiikey] print (sdiikey, sdiiweight) # accumulate SM for each contact group / column group for col in mg: w +=1 calcColSM(sm, msaMatrix, col) print '' ''' #print repr(sm) #print '' n = msaMatrix.shape[0] T = w*n*(n-1)/2 # normalization term print 'w: %d' % w # number of columns (contact group) print 'n: %d' % n # number of sequence print 'T: %d' % T # convert cij to qij # Normalize the pair frequencies so they will sum to 1 for c in sm: sm[c] = 1.0*sm[c]/T #print repr(sm) #print '' # Calculate the expected probability of occurrence of the ith residue in an (i,j) pair # pi = qii + sum( qij/2 )_{i!=j} pi = {} for i in xrange(0, len(AAlist)): A = AAlist[i] sum_qij = 0 for j in xrange(i+1, len(AAlist)): # i should not = j B = AAlist[j] sum_qij += sm[A+B]/2 pi[A] = sm[A+A] + sum_qij print repr(pi) print '' # The desired denominator is the expected frequency for each pair eij = {} for i in xrange(0, len(AAlist)): A = AAlist[i] for j in xrange(i+1, len(AAlist)): B = AAlist[j] eij[A+B] = 2 * pi[A] * pi[B] eij[A+A] = pi[A] * pi[A] print len(eij) print repr(eij) print '' # Log odds ratio sij = round(2*log2(qij/eij)) sij = {} for i in xrange(0, len(AAlist)): A = AAlist[i] for j in xrange(i, len(AAlist)): B = AAlist[j] if eij[A+B] == 0.0 or sm[A+B]==0.0: sij[A+B] = 0 else: sij[A+B] = int(round(2*math.log((sm[A+B]/eij[A+B]),2))) # sij[A+B] = sm[A+B]/eij[A+B] print repr(sij) print len(sij) print '' saveBlosum(EBlist, sij, outfile)
def main(): pdbname = '1t3r.pdb' p = protein(pdbname) #p.writeCA(p.pdb+'.ca') p.printPDB() p.writeChainATips('AAtips.def', p.pdb+'.tip')