def max_cover_pdb(protein, pdbch, top=-1): """ Input a list of PDB chain pair for a protein Return a sorted list with the same coverage but less members """ pdbch = sorted(list(pdbch)) from map_pdb_res import pdblist_to_uniprot pdbmap = pdblist_to_uniprot([pdb for pdb, ch in pdbch]) newlist = [] covres = set() while len(pdbch) > 0: ## all checked best_cov = covres best_pdbch = None for pdb, ch in pdbch: newres = set() for pdbres in pdbmap: if pdbres.startswith(pdb + ":" + ch + ":"): prores = pdbmap[pdbres] if prores.startswith(protein): ## same protein newres.add(prores) if len(covres | newres) > len(best_cov): best_cov = covres | newres best_pdbch = pdb, ch if best_pdbch == None: ## maximum coverage break pdbch.remove(best_pdbch) if len(best_cov) > len(covres): ## improved newlist.append(best_pdbch) if len(newlist) == top: break covres = best_cov return newlist
def max_cover_pdb(protein, pdbch, top=-1): ''' Input a list of PDB chain pair for a protein Return a sorted list with the same coverage but less members ''' pdbch = sorted(list(pdbch)) from map_pdb_res import pdblist_to_uniprot pdbmap = pdblist_to_uniprot([pdb for pdb, ch in pdbch]) newlist = [] covres = set() while len(pdbch) > 0: ## all checked best_cov = covres best_pdbch = None for pdb, ch in pdbch: newres = set() for pdbres in pdbmap: if pdbres.startswith(pdb + ':' + ch + ':'): prores = pdbmap[pdbres] if prores.startswith(protein): ## same protein newres.add(prores) if len(covres | newres) > len(best_cov): best_cov = covres | newres best_pdbch = pdb, ch if best_pdbch == None: ## maximum coverage break pdbch.remove(best_pdbch) if len(best_cov) > len(covres): ## improved newlist.append(best_pdbch) if len(newlist) == top: break covres = best_cov return newlist
def combine_pdb_residue(filename, listname=None, outname=None): ''' Combine the features vectors from prepare_feature functions by taking the maximum values for PDB residues mapped to the same resiude in a protein pair. Output is a file with the same format ''' if listname == None: listname = filename.replace('.fea','.map') if outname == None: outname = filename + '.max' data = [] pdblist = set() with open(filename, 'r') as tempfile: for line in tempfile: ele = line.split('\t') info = ele[0] pp, idx, res = info.split(',') pdblist.add(res.split(':')[0]) vals = [float(val) for val in ele[1:]] data.append([pp, res, vals]) from map_pdb_res import pdblist_to_uniprot res_map = pdblist_to_uniprot(pdblist) sup_map = {} ## supplimentary residue map from the input list with open(listname, 'r') as tempfile: for line in tempfile: p,s,c = line.split('\t') sup_map[s] = p comb = {} for pp, res, vals in data: if res in res_map: res = res_map[res] if res.count(':') == 2: pdb, ch, pos = res.split(':') if pdb in sup_map: res = sup_map[pdb]+':'+pos if (pp,res) in comb: maxv = [] for i,j in zip(comb[(pp,res)], vals): if i == float('nan'): maxv.append(j) elif j == float('nan'): maxv.append(i) else: maxv.append(max(i,j)) comb[(pp,res)] = maxv else: comb[(pp,res)] = vals with open(outname, 'w') as tempfile: for pp,res in sorted(comb.keys()): tempfile.write('%s,0,%s'%(pp,res)) for val in comb[(pp,res)]: tempfile.write('\t%s'%val) tempfile.write('\n') return outname
def map_pdb_residue(filename, listname, useidx=1): ''' Input file format: for each line: p1=p2,index,pdb:chain:pos value others Output format: a list: p1=p2, residue, max(value) ''' data = [] pdblist = set() infile = open(filename, 'r') for line in infile: ele = line.split('\t') g1g2, pdbidx, pdbres = ele[0].split(',') has_na = False for v in ele: if v.lower().startswith('na'): has_na = True if has_na: continue value = ele[useidx].strip() data.append((g1g2, pdbres, float(value))) pdblist.add(pdbres.split(':')[0]) infile.close() from map_pdb_res import pdblist_to_uniprot res_map = pdblist_to_uniprot(pdblist) sup_map = {} ## supplimentary residue map from the input list with open(listname, 'r') as tempfile: for line in tempfile: p,s,c = line.split('\t') sup_map[s] = p comb = {} for pp, res, val in data: if res in res_map: res = res_map[res] elif res.count(':') == 2: pdb, ch, pos = res.split(':') if pdb in sup_map: res = sup_map[pdb]+':'+pos if (pp, res) in comb and comb[(pp, res)] > val: continue ## no need to update if having a larger value comb[(pp, res)] = val output = [] for pp, res in comb: output.append((pp, res, comb[(pp, res)])) output.sort() return output