def main(): if len(sys.argv) != 5: # if no input print "ERORR: not the right number of arguments" print "syntax: ~/zzz.scripts/be_blasti.py ( --pdbcode pdbcodename | --pdbfile pdbfilename ) ( carbohydrate| nocarbohydrate ) ( renumber | original_numbers )" return ## input pdb #pdbcode = '3T4G' flag = sys.argv[1] if flag == '--pdbcode': ## if a pdbcode is spesified then we will download it from the pdb. pdbcode = sys.argv[2] #file = '/raid9/tbalius/Projects/ProteomicDOCKing/plversion1/'+pdbcode+'/'+pdbcode+'/'+pdbcode+'.pdb.ori' #file = '/raid9/tbalius/Projects/ProteomicDOCKing/plversion1/'+pdbcode+'/'+pdbcode+'/'+pdbcode+'.pdb' #url = 'ftp://ftp.wwpdb.org/pub/pdb/data/biounit/coordinates/all/' + pdbcode + '.pdb1.gz' url = 'http://www.rcsb.org/pdb/files/' + pdbcode + '.pdb' #url = 'http://www.rcsb.org/pdb/files/'+pdbcode+'.pdb1' # biological subunit print "downloading with urllib" urllib.urlretrieve(url, pdbcode + ".pdb") file = pdbcode + ".pdb" elif flag == '--pdbfile': # if a filename is spesified then we will # process the file name # the filename should have the following formate: # /path/pdbcode.pdb file = sys.argv[2] pdbcode = process_filename_for_pdb(file) else: print "flag is needed." print "options: --pdbcode (download from pdb) or --pdbfile (spesify the file locations)" exit() print "pdbcode = " + pdbcode print "file = " + file if (sys.argv[3] == 'carbohydrate'): flag_carbohydrate = True elif (sys.argv[3] == 'nocarbohydrate'): flag_carbohydrate = False else: print "ERORR: the second parameter can be carbohydrate or nocarbohydrate" exit() if (sys.argv[4] == 'renumber'): flag_renumber = True elif (sys.argv[4] == 'original_numbers'): flag_renumber = False else: print "ERORR: the third parameter can be renumber (renumber residues or original_numbers (use the curent numbering)" print " the renumber is recomended. get ride of code for insertion of residues." exit() # this function will split up "Alternate location indicator" # ALI is not stored in BIO PDB. # At this step we may want to do other processing # This will create 3 files: (1) the original files downloaded, (2) everything incomon + the ALI mark A, and (3) everything incomon + the ALI mark B. # the file (2) will be used for additional steps and is copied to pdbcode.pdb Preprocess_PDB(file) #exit() if (not os.path.exists(file) or os.path.getsize(file) == 0): print file + " is empty or does not exist " exit() parser = BP.PDBParser() struc = parser.get_structure(pdbcode, file) io = BP.PDBIO() io.set_structure(struc) io.save("everything.pdb") surf = BP.get_surface(file) get_structure_stat(struc, surf) ## remove waters. struc = get_substructure_remove_list(struc, ['HOH']) ## remove waters and other small molecules (Hetatms) that are not posible ligand. struc = get_substructure_remove_list(struc, notligand_list) #struc = get_substructure_remove_waters(struc) ## remove carbohydrates if not (flag_carbohydrate): struc = get_substructure_remove_list(struc, carbohydrate_list) io.set_structure(struc) io.save("nowaters.pdb") #get_structure_stat(struc,surf) ## if multiple models choose the first model. struc = get_structure_one_model(struc) #get_structure_stat(struc,surf) ## idenify small peptides print "idenifying small peptides" peptides_list, peptides_struc_list = get_peptides(struc) ## write out peptides pepnum = write_list('pep', 1, peptides_struc_list, surf, io) #remove peptides struc = get_receptor(struc, [], peptides_list, -1) ## idenify ligands print "idenifying ligands" lig_list, lig_struc_list = get_ligs(struc) ## write out ligands: lignum = write_list('lig', 1, lig_struc_list, surf, io) ## check if Attached to neighbor ## check if Attached to or clash? are_ligands_close(lig_list) ## Idenify a recptor compatable with the ligands/peptides ## we only include chains close to a ligand/peptide as ## part of protein. ## evaluate chains by seeing how close the ligand is to chain. ## get_receptor will keep chains with in a certein distance (7 ## Angstroms) from the ligand or the the chain closest to the ligand. receptor_struc = get_receptor(struc, lig_list, [], lignum) # renumber all chains. Each chain will start at one. if (flag_renumber): renumber_residues(receptor_struc) io.set_structure(receptor_struc) io.save("rec.pdb")
pdbcode = sys.argv[1] #file = '/raid9/tbalius/Projects/ProteomicDOCKing/plversion1/'+pdbcode+'/'+pdbcode+'/'+pdbcode+'.pdb.ori' file = '/raid9/tbalius/Projects/ProteomicDOCKing/plversion1/' + pdbcode + '/' + pdbcode + '/' + pdbcode + '.pdb' if (not os.path.exists(file) or os.path.getsize(file) == 0): print file + " is empty or does not exist " exit() parser = BP.PDBParser() struc = parser.get_structure(pdbcode, file) io = BP.PDBIO() io.set_structure(struc) io.save("everything.pdb") surf = BP.get_surface(file) get_structure_stat(struc, surf) ## remove waters. struc = get_substructure_remove_list(struc, ['HOH']) #struc = get_substructure_remove_waters(struc) io.save("nowaters.pdb") get_structure_stat(struc, surf) ## if multiple models choose the first model. struc = get_structure_one_model(struc) get_structure_stat(struc, surf) ## idenify ligands