def process_test_data(protein_path = 'test_protein',ligand_path = 'test_ligand',o_path = 'test_output'): for ligand_file in os.listdir(ligand_path): ligand_st_path = os.path.join(ligand_path,ligand_file)#Ligand file path protein_st_path = os.path.join(protein_path,ligand_file)#protein file path print ligand_st_path,protein_st_path if not os.path.exists(protein_st_path):#protein file not exist #print protein_st_path,'not exist' continue else: ligand_st = structure.StructureReader(ligand_st_path).next() protein_st = structure.StructureReader(protein_st_path).next() complex_id=ligand_file[:4] try:#dangerous fp_gen_path = fp_gen1.gen_fp(receptor=protein_st,binder=ligand_st,complex_id=complex_id,root_path=o_path) except: print complex_id,'cannot be processed' sift_path = os.path.join(complex_id,'pattern.dat') if os.path.exists(sift_path): print complex_id,'is processed' continue avg_sift_path = os.path.join(o_path,sift_path) avg_sift.gen_avg_sift(fp_gen_path,avg_sift_path)#generate average sift
def get_avg_matrix(receptor_path='',ligands_path = [],output_fp_path = '',output_pattern_path = ''): #get receptor receptor = structure.StructureReader(receptor_path).next() receptor.title = receptor_path print receptor_path,'set' rec_tree = fp_gen1.distance_tree() rec_tree.set_receptor_structure(receptor) rec_tree.parse_receptor() #get ligands ligands = [] for ligand_path in ligands_path: lig = structure.StructureReader(ligand_path).next() lig.title = ligand_path ligands.append(lig) print ligand_path,'added' for lig in ligands: rec_tree.find_close_residues(lig) print lig,'close residues found' with open(output_fp_path, 'w') as out_fp: for key in rec_tree.fingerprints.sifts.keys(): rec_tree.fingerprints.fill_missing_zeros(rec_tree.min_res, rec_tree.max_res, key) fp_string = rec_tree.fingerprints.get_sift_string(key) out_fp.write(rec_tree.receptor.title + ':' + key + ':' + str(rec_tree.min_res) + ':' + fp_string + '\n') print 'finger print',output_fp_path,'saved' avg_sift.gen_avg_sift(output_fp_path,output_pattern_path) print 'pattern',output_pattern_path,'saved'
def cal_avg_sift_from_complex(complex_path='data/HL_chain/1RD8-1918/complex.1000.pdb'): print complex_path _,chain_name,complex_id,instance_name = complex_path.split('/') p_chain_path=os.path.join('processed_data',chain_name) if not os.path.exists(p_chain_path): os.mkdir(p_chain_path) p_complex_path=os.path.join(p_chain_path,complex_id) if not os.path.exists(p_complex_path): os.mkdir(p_complex_path) p_instance_path=os.path.join(p_complex_path,instance_name)[:-4] if not os.path.exists(p_instance_path): os.mkdir(p_instance_path) antigen_path = os.path.join(p_instance_path,'antigen.pdb') antibody_path = os.path.join(p_instance_path,'antibody.pdb') print antigen_path,antibody_path antibody,antigen = split_protein_protein_complex_manual(complex_path = complex_path,antigen_path = antigen_path ,antibody_path=antibody_path) fp_path = os.path.join(p_instance_path,'fp.out') print antibody,antigen try: fp_gen_path = fp_gen1.gen_fp(receptor=antibody,binder = antigen,fp_path= fp_path)#get the finger print except: print 'failed\n' return else: print 'good\n' sift_path=os.path.join(p_instance_path,'avg_sift.out') print sift_path avg_sift.gen_avg_sift(fp_gen_path,sift_path)#generate average sift
def cal_avg_sift_from_complex(complex_path='data/HL_chain/1RD8-1918/complex.1000.pdb',output_dir="/home/xiaohan/Desktop",split_fun=globals()['split_protein_protein_complex_manual']): if not os.path.exists(output_dir): os.mkdir(output_dir); binder_path = os.path.join(output_dir,'binder.pdb') receptor_path = os.path.join(output_dir,'receptor.pdb') split_fun(complex_path = complex_path,binder_path = binder_path ,receptor_path=receptor_path,chain_info_file = os.path.join(os.path.dirname(complex_path),u"新建文本文档.txt")) fp_path = os.path.join(output_dir,'fp.out') fp_gen_path = fp_gen1.gen_fp(receptor_file=receptor_path,binder_file = binder_path,fp_path= fp_path)#get the finger print #fp_gen_path = fp_gen1.gen_fp(receptor_file = binder_path,binder_file = receptor_path,fp_path= fp_path)#get the finger print sift_path=os.path.join(output_dir,'avg_sift.out') avg_sift.gen_avg_sift(fp_gen_path,sift_path)#generate average sift
def gen_protein_protein_complex_avg_sift(complex_st_path,processed_data_path = 'processed_data'): complex_id, ext = fileutils.splitext(os.path.basename(complex_st_path)) sift_path = '%s/%s/%s_pattern.dat' %(processed_data_path,complex_id,complex_id) if os.path.exists(sift_path): print complex_id,'is processed' return antibody, antigen = split_protein_protein_complex_manual(complex_st_path,processed_data_path)#load complex structure and split it fp_gen_path = fp_gen1.gen_fp(receptor=antibody,binder = antigen,complex_id = complex_id,root_path = processed_data_path)#get the finger print avg_sift.gen_avg_sift(fp_gen_path,sift_path)#generate average sift
if self.used_bits_number != 13: out_sift = "" for chunk in self.sifts[lig_name].sift: for bit in self.active_bits: out_sift.append(chunk.bit_set[bit]) return "".join(out_sift) else: """ concatenate the bit string """ out_sift = [] for fp_chunk in self.sifts[lig_name].sift.keys(): out_sift.append("".join(self.sifts[lig_name].sift[fp_chunk].bit_set)) # for bit in self.sifts[lig_name].sift[fp_chunk].bit_set: # out_sift += str(bit) return "".join(out_sift) if __name__ == "__main__": rec_file = "/home/xiaohan/Downloads/protein/1CE1/1CE1_antibody.pdb" bind_file = "/home/xiaohan/Downloads/protein/1CE1/1CE1_antigen.pdb" from avg_sift import gen_avg_sift cutoff = 4.0 fp_file = "/home/xiaohan/Desktop/1CE1_fp_%d.dat" % (cutoff) pat_file = "/home/xiaohan/Desktop/1CE1_pat_%d.dat" % (cutoff) gen_fp(rec_file, bind_file, fp_file, cutoff=cutoff) gen_avg_sift(fp_file, pat_file)
fp_fp = "%s/fp" %data_root_fp avg_sift_fp = "%s/avg_sift" %data_root_fp ligand_fp = "%s/ligand" %data_root_fp binder_fp= "%s/binder" %data_root_fp #create them prepare_dirs([fp_fp , avg_sift_fp , ligand_fp , binder_fp]) #calculation start for complex_fp in glob.glob(os.path.join(pdb_fp,'*')): name_with_pdb = os.path.split(complex_fp)[-1] complex_id = os.path.split(complex_fp)[-1].split('.')[0] #init paths cur_binder_fp = os.path.join(binder_fp , name_with_pdb) cur_ligand_fp = os.path.join(ligand_fp , name_with_pdb) cur_avg_sift_fp = os.path.join(avg_sift_fp , "%s.sift" %complex_id) cur_fp_fp = os.path.join(fp_fp , "%s.fp" %complex_id) #split complex into ligand and binder split_complex(complex_fp , cur_binder_fp , cur_ligand_fp , has_atom_or_hetatm, ATOM_as_binder) antibody , antigen = load_structure(cur_binder_fp) , load_structure(cur_ligand_fp) #generate finger print fp_gen_path = gen_fp(receptor=antibody,binder = antigen,fp_path= cur_fp_fp)#get the finger print #generate sift gen_avg_sift(cur_fp_fp,cur_avg_sift_fp)#generate average sift