def __init__(self,path,mode="score"): """Parse a PSSM file from BLAST into a usable datastructure""" self.pssmmap = {} self.native_sequence = {} pssmfile = fileutil.universal_open(path,'r') pssmfile.readline() pssmfile.readline() header = pssmfile.readline() header = header.split() header = header[0:21] for line in pssmfile: #print line line = line.split() #self.native_sequence.append( if len(line) == 0: break res_num = int(line[0]) res_id = line[1] self.native_sequence[res_num] = res_id line_map = {} if mode == "score": data = line[2:23] if mode == "percent": data = line[22:42] #print data for resname,score in zip(header,data): line_map[resname] = int(score) self.pssmmap[res_num] = line_map pssmfile.close()
def __init__(self,filename): """load a param file into a data structure""" self.atoms = {} self.bonds = [] self.icoors = {} infile = fileutil.universal_open(filename,'r') for line in infile: if len(line)==0: continue line = line.split() tag = line[0] if tag =="NAME": self.name = line[1] elif tag =="IO_STRING": self.io_string = line[1] elif tag == "TYPE": self.type = line[1] elif tag == "AA": self.aa = line[1] elif tag == "ATOM": current_atom = Atom(line[1],line[2],line[3],line[4]) self.atoms[line[1]]= current_atom elif tag == "BOND": current_bond = Bond(line[1],line[2]) self.bonds.append(current_bond) self.atoms[line[1]].add_bond(current_bond) self.atoms[line[2]].add_bond(current_bond) elif tag == "NBR_ATOM": self.nbr_atom = line[1] elif tag == "NBR_RADIUS": self.nbr_radius = line[1] elif tag == "ICOOR_INTERNAL": current_icoor = icoor(line[1],line[2],line[3],line[4],line[5],line[6],line[7]) self.icoors[line[1]]=current_icoor infile.close()
def load_pdb(path): """return a biopython structure object given a pdb file path""" parser = PDBParser(PERMISSIVE=1) pdb_file = fileutil.universal_open(path, 'rU') structure = parser.get_structure(path[0:4], pdb_file) pdb_file.close() return structure
def __init__(self, path, mode="score"): """Parse a PSSM file from BLAST into a usable datastructure""" self.pssmmap = {} self.native_sequence = {} pssmfile = fileutil.universal_open(path, 'r') pssmfile.readline() pssmfile.readline() header = pssmfile.readline() header = header.split() header = header[0:21] for line in pssmfile: #print line line = line.split() #self.native_sequence.append( if len(line) == 0: break res_num = int(line[0]) res_id = line[1] self.native_sequence[res_num] = res_id line_map = {} if mode == "score": data = line[2:23] if mode == "percent": data = line[22:42] #print data for resname, score in zip(header, data): line_map[resname] = int(score) self.pssmmap[res_num] = line_map pssmfile.close()
def load_pdb(path): """return a biopython structure object given a pdb file path""" parser = PDBParser(PERMISSIVE=1) pdb_file = fileutil.universal_open(path,'rU') structure = parser.get_structure(path[0:4],pdb_file) pdb_file.close() return structure
def make_table(name_score_rmsd, out_name): table = fileutil.universal_open(out_name, 'w') table.write("file\tscore\tRMSD\n") for point in name_score_rmsd: table.write( str(point[0]) + "\t" + str(point[2]) + "\t" + str(point[1]) + "\n") table.close()
def read_bcl_file(self,path): print "This function doesn't work yet" sys.exit() out_file = fileutil.universal_open(path,'r') list_header ="bcl::storage::List<bcl::storage::VectorND2<bcl::math::Vector<double>>>" vector_header = "bcl::storage::VectorND2<bcl::math::Vector<double>>" double_header = "bcl::math::Vector<double>" list_scope = False vector_scope = False double_scope = False
def write_file(self,filename): out_file = fileutil.universal_open(filename,'w') #write reference energies out_file.write("METHOD_WEIGHTS\tref") for key in aa_codes_in_order: out_file.write("\t"+str(self.ref_energies[key])) out_file.write("\n") #write the other weights for key in self.weights: out_file.write(key+"\t"+str(self.weights[key])+"\n") out_file.close()
def write_file(self, filename): out_file = fileutil.universal_open(filename, 'w') #write reference energies out_file.write("METHOD_WEIGHTS\tref") for key in aa_codes_in_order: out_file.write("\t" + str(self.ref_energies[key])) out_file.write("\n") #write the other weights for key in self.weights: out_file.write(key + "\t" + str(self.weights[key]) + "\n") out_file.close()
def read_file(self, filename): self.ref_energies = {} self.weights = {} in_file = fileutil.universal_open(filename, 'rU') for line in in_file: line = line.split() if line[0] == "METHOD_WEIGHTS": #Reference energies are ordered by 1 letter name for aa, value in zip(aa_codes_in_order, line[2:len(line)]): self.ref_energies[aa] = float(value) else: self.weights[line[0]] = float(line[1]) in_file.close()
def read_file(self,filename): self.ref_energies = {} self.weights = {} in_file = fileutil.universal_open(filename,'rU') for line in in_file: line = line.split() if line[0] == "METHOD_WEIGHTS": #Reference energies are ordered by 1 letter name for aa, value in zip(aa_codes_in_order,line[2:len(line)]): self.ref_energies[aa] = float(value) else: self.weights[line[0]] = float(line[1]) in_file.close()
def __init__(self, path): infile = fileutil.universal_open(path, 'r') table = False appended_data = False header = [] self.weights = {} self.records = {} for line in infile: if len(line) == 0: continue line = line.split() if line[0] == "#BEGIN_POSE_ENERGIES_TABLE": table = True elif line[0] == "#END_POSE_ENERGIES_TABLE": appended_data = True elif table and line[0] == "label": header = line[1:len(line)] elif table and line[0] == "weights": weightline = line[1:len(line)] for term, weight in zip(header, weightline): if (weight != "NA"): weight = float(weight) self.weights[term] = weight else: self.weights[term] = 1.0 elif table and not appended_data: name = line[0] if name != "pose": resid = int(name.split("_").pop()) else: resid = 0 scores = line[1:len(line)] scoredict = {} for term, score in zip(header, scores): score = float(score) scoredict[term] = score self.records[resid] = ScoreRecord(name, resid, scoredict) elif table and appended_data: if len(line) < 2: continue #all these apply to the whole pose, so we'll extract the scoredict for the pose #this *must* come after the pose table, so we can get the whole set term = line[0] value = line[1] try: self.records[0].scores[term] = float(value) except ValueError: self.records[0].scores[term] = value infile.close()
def __init__(self,path): infile = fileutil.universal_open(path,'r') table = False appended_data = False header=[] self.weights = {} self.records = {} for line in infile: if len(line) == 0: continue line = line.split() if line[0] == "#BEGIN_POSE_ENERGIES_TABLE": table = True elif line[0] == "#END_POSE_ENERGIES_TABLE": appended_data = True elif table and line[0] == "label": header = line[1:len(line)] elif table and line[0] =="weights": weightline = line[1:len(line)] for term, weight in zip(header,weightline): if(weight != "NA"): weight = float(weight) self.weights[term] = weight else: self.weights[term] = 1.0 elif table and not appended_data: name = line[0] if name != "pose": resid = int(name.split("_").pop()) else: resid = 0 scores = line[1:len(line)] scoredict = {} for term, score in zip(header,scores): score = float(score) scoredict[term] = score self.records[resid] = ScoreRecord(name,resid,scoredict) elif table and appended_data: if len(line) < 2: continue #all these apply to the whole pose, so we'll extract the scoredict for the pose #this *must* come after the pose table, so we can get the whole set term = line[0] value = line[1] try: self.records[0].scores[term] = float(value) except ValueError: self.records[0].scores[term] = value infile.close()
def read(self,filename,append=False): """read a rosetta 3 loop file into the loop manager. if append=True, add the contents of the loop file to the existing loops""" if not append: self.looplist = [] loop_file = fileutil.universal_open(filename,"rU") for line in loop_file: fields = line.split() if line[0] == '#': continue #this is a comment if len(fields) <1 : continue #this is a blank line if fields[0] != "LOOP": continue #this is something that is not a loop line loop = RosettaLoop() loop.set_loop_from_string(line) self.looplist.append(loop) loop_file.close()
def read(self, filename, append=False): """read a rosetta 3 loop file into the loop manager. if append=True, add the contents of the loop file to the existing loops""" if not append: self.looplist = [] loop_file = fileutil.universal_open(filename, "rU") for line in loop_file: fields = line.split() if line[0] == '#': continue #this is a comment if len(fields) < 1: continue #this is a blank line if fields[0] != "LOOP": continue #this is something that is not a loop line loop = RosettaLoop() loop.set_loop_from_string(line) self.looplist.append(loop) loop_file.close()
def get_table(path): """return the score table from the bottom of a PDB as a list of lines""" raw_table = [] infile = fileutil.universal_open(path,'r') table = False for line in infile: line_split = line.split() if len(line_split) <1: break if line_split[0] == "#BEGIN_POSE_ENERGIES_TABLE": table =True raw_table.append(line) #elif table and line_split[0] == "#END_POSE_ENERGIES_TABLE": # raw_table.append(line) # break elif table: raw_table.append(line) infile.close() return raw_table
def get_table(path): """return the score table from the bottom of a PDB as a list of lines""" raw_table = [] infile = fileutil.universal_open(path, 'r') table = False for line in infile: line_split = line.split() if len(line_split) < 1: break if line_split[0] == "#BEGIN_POSE_ENERGIES_TABLE": table = True raw_table.append(line) #elif table and line_split[0] == "#END_POSE_ENERGIES_TABLE": # raw_table.append(line) # break elif table: raw_table.append(line) infile.close() return raw_table
def write_bcl_file(self,path): out_file = fileutil.universal_open(path,'w') list_header ="bcl::storage::List<bcl::storage::VectorND2<bcl::math::Vector<double>>>" vector_header = "bcl::storage::VectorND2<bcl::math::Vector<double>>" double_header = "bcl::math::Vector<double>" out_file.write(list_header+"\n") out_file.write(str(len(self.records))+"\n") for first_col, second_col in self.records: out_file.write(vector_header+"\n") out_file.write(double_header+"\n") out_file.write(str(1)+"\n") out_file.write(str(first_col)+"\n") out_file.write(double_header+"\n") out_file.write(str(1)+"\n") out_file.write(str(second_col)+"\n") out_file.close()
def add_file(self, path, ignore_ref=True): infile = fileutil.universal_open(path, 'r') header = [] for line in infile: if len(line) == 0: continue line = line.split() if line[0] == "SCORES": #this is an atom tree diff file tag = line[1] if ignore_ref and tag[0:5] == "%REF%": continue record = PoseScoreRecord(tag) record.set_file(path) scorefields = line[2:len(line)] try: for pair in score_pairs(scorefields): record.add_score(*pair) self.records[tag] = record except ValueError: print "theres some problem with this score line, possible corruption, skipping line" continue #elif elif line[0] == "SCORE:": #this is a normal silent file if line[1] == "score" and len( header) == 0: #this is the header header = line[1:len( line)] #stick the scoreterms in the header else: #this is a score line tag = line[-1] #the last item is the tag record = PoseScoreRecord(tag) record.set_file(path) for term, score in zip(header, line[1:len(line) - 1]): try: record.add_score(term, float(score)) except ValueError: record.add_score(term, score) self.records[tag] = record infile.close()
def add_file(self,path,ignore_ref=True): infile = fileutil.universal_open(path,'r') header=[] for line in infile: if len(line)== 0: continue line = line.split() if line[0] == "SCORES": #this is an atom tree diff file tag = line[1] if ignore_ref and tag[0:5] =="%REF%": continue record = PoseScoreRecord(tag) record.set_file(path) scorefields = line[2:len(line)] try: for pair in score_pairs(scorefields): record.add_score(*pair) self.records[tag] = record except ValueError: print "theres some problem with this score line, possible corruption, skipping line" continue #elif elif line[0] == "SCORE:": #this is a normal silent file if line[1] == "score" and len(header) == 0: #this is the header header = line[1:len(line)] #stick the scoreterms in the header else: #this is a score line tag = line[-1] #the last item is the tag record = PoseScoreRecord(tag) record.set_file(path) for term,score in zip(header,line[1:len(line)-1]): try: record.add_score(term,float(score)) except ValueError: record.add_score(term,score) self.records[tag] = record infile.close()
def __init__(self, filename): """load a param file into a data structure""" self.atoms = {} self.bonds = [] self.icoors = {} infile = fileutil.universal_open(filename, 'r') for line in infile: if len(line) == 0: continue line = line.split() tag = line[0] if tag == "NAME": self.name = line[1] elif tag == "IO_STRING": self.io_string = line[1] elif tag == "TYPE": self.type = line[1] elif tag == "AA": self.aa = line[1] elif tag == "ATOM": current_atom = Atom(line[1], line[2], line[3], line[4]) self.atoms[line[1]] = current_atom elif tag == "BOND": current_bond = Bond(line[1], line[2]) self.bonds.append(current_bond) self.atoms[line[1]].add_bond(current_bond) self.atoms[line[2]].add_bond(current_bond) elif tag == "NBR_ATOM": self.nbr_atom = line[1] elif tag == "NBR_RADIUS": self.nbr_radius = line[1] elif tag == "ICOOR_INTERNAL": current_icoor = icoor(line[1], line[2], line[3], line[4], line[5], line[6], line[7]) self.icoors[line[1]] = current_icoor infile.close()
scores.add_file(args[0]) structure_count = len(scores) percent = float(options.percent)/100.0 structs_to_print =int(percent*structure_count) sorted_scores = scores.sorted_score_generator(options.term) count = 0 while count < structs_to_print: (tag,score) = sorted_scores.next() print tag,score count += 1 elif options.mode =="pdb" or options.mode=="PDB": file_scores = [] pathfile = fileutil.universal_open(args[0],"r") for path in pathfile: path = path.rstrip() scores = rosettaScore.ScoreTable(path) total_score = scores.get_score(0,options.term) file_scores.append( (path,total_score) ) file_scores = sorted(file_scores,key=lambda x: x[1]) structure_count = len(file_scores) percent = float(options.percent)/100.0 structs_to_print = int(percent*structure_count) count = 0 for i in range(structs_to_print): (tag,score) = file_scores[i] print tag,score else:
data = [] #list of tuples in form (tag,x_score,y_score) if options.silent != "": scores = rosettaScore.SilentScoreTable() scores.add_file(options.silent) x_axis_scores = scores.score_generator(options.x_axis) y_axis_scores = scores.score_generator(options.y_axis) for x_point, y_point in zip(x_axis_scores, y_axis_scores): x_tag = x_point[0] y_tag = y_point[0] if x_tag != y_tag: sys.exit("tags aren't equal, something is very wrong") data.append((x_tag, x_point[1], y_point[1])) elif options.silent_list != "": silent_list = fileutil.universal_open(options.silent_list, "rU") for path in silent_list: scores = rosettaScore.SilentScoreTable() scores.add_file(path.rstrip()) x_axis_scores = scores.score_generator(options.x_axis) y_axis_scores = scores.score_generator(options.y_axis) for x_point, y_point in zip(x_axis_scores, y_axis_scores): x_tag = x_point[0] y_tag = y_point[0] if x_tag != y_tag: sys.exit("tags aren't equal, something is very wrong") data.append((x_tag, x_point[1], y_point[1])) if options.pdb_list != "": pdb_list = fileutil.universal_open(options.pdb_list, "rU") for pdb in pdb_list:
def make_table(name_score_rmsd,out_name): table = fileutil.universal_open(out_name,'w') table.write("file\tscore\tRMSD\n") for point in name_score_rmsd: table.write(str(point[0])+"\t"+str(point[2])+"\t"+str(point[1])+"\n") table.close()
residue_id = int(options.start) except ValueError: sys.exit("residue number specified with -n must be an integer") chain_id = "" for residue in struct.get_residues(): chain = residue.get_parent() if(chain_id != chain.get_id() and not options.norestart): chain_id = chain.get_id() residue_id=int(options.start) #print chain.get_id() if(options.preserve): hetero = residue.id[0] insert = residue.id[2] residue.id=(hetero,residue_id,insert) else: residue.id=(' ',residue_id,' ') residue_id +=1 io=PDBIO() io.set_structure(struct) outfile = fileutil.universal_open(args[1],'w') io.save(outfile) if(options.table): raw_table = rosettaScore.get_table(args[0]) #outfile = fileutil.universal_open(args[1],'a') outfile.writelines(raw_table) outfile.close()
def write(self, filename): """write the contents of the loop manager to a Rosetta3 loop file""" loop_file = fileutil.universal_open(filename, "w") for loop in self.looplist: loop_file.write(loop.to_string() + "\n") loop_file.close()
def write(self,filename): """write the contents of the loop manager to a Rosetta3 loop file""" loop_file = fileutil.universal_open(filename,"w") for loop in self.looplist: loop_file.write(loop.to_string()+"\n") loop_file.close()
for x in list: yield x usage = "%prog [options] alignment_file.aln template.pdb output.pdb" parser=OptionParser(usage) parser.add_option("--template",dest="template",help="name of the template sequence", default="template") parser.add_option("--target", dest ="target",help="name of the target sequence",default="target") parser.add_option("--chain",dest="chain",help="chain to thread pdb around",default="A") parser.add_option("--align_format",dest="align_format",help="alignment file format, choose from clustal, emboss, fasta, fasta-m10,ig,nexus,phylip,stockholm. See http://biopython.org/wiki/AlignIO for details",default="clustal") (options,args)= parser.parse_args() if len(args) != 3: parser.error("you must specify an alignment file, template pdb, and output pdb") #read in our input files alignment_file = fileutil.universal_open(args[0],'rU') alignment_data = AlignIO.read(alignment_file,options.align_format) alignment_file.close() template_struct = util.load_pdb(args[1]) #if len(alignment_data) != 2: # sys.exit("alignment file must have exactly 2 sequences!") #find all the gaps, get numeric IDs from the string tags in the alignment file try: template_gaps = alignment.find_gaps(alignment_data,options.template) except LookupError: sys.exit("could not find "+options.template+" in alignment file") try: target_gaps = alignment.find_gaps(alignment_data,options.target) except LookupError:
#!/usr/bin/env python2.5 import array import Bio.PDB from optparse import OptionParser from rosettautil.protein import util from rosettautil.rosetta import loops from rosettautil.util import fileutil usage = "%prog [options] loopfile.txt input.pdb output.pdb" parser = OptionParser(usage) (options, args) = parser.parse_args() loop_manager = loops.RosettaLoopManager() loop_manager.read(args[0]) input_struct = util.load_pdb(args[1]) zero_triplet = array.array('f', [0.0, 0.0, 0.0]) for atom in input_struct.get_atoms(): resnum = atom.get_parent().get_id()[1] if loop_manager.is_res_in_loop(resnum): atom.set_coord(zero_triplet) atom.set_occupancy(-1.0) pdb_io = Bio.PDB.PDBIO() pdb_io.set_structure(input_struct) outfile = fileutil.universal_open(args[2], 'w') pdb_io.save(outfile) outfile.close()
data = [] #list of tuples in form (tag,x_score,y_score) if options.silent != "": scores = rosettaScore.SilentScoreTable() scores.add_file(options.silent) x_axis_scores = scores.score_generator(options.x_axis) y_axis_scores = scores.score_generator(options.y_axis) for x_point, y_point in zip(x_axis_scores,y_axis_scores): x_tag= x_point[0] y_tag = y_point[0] if x_tag != y_tag: sys.exit("tags aren't equal, something is very wrong") data.append( (x_tag,x_point[1],y_point[1]) ) elif options.silent_list != "": silent_list = fileutil.universal_open(options.silent_list,"rU") for path in silent_list: scores = rosettaScore.SilentScoreTable() scores.add_file(path.rstrip()) x_axis_scores = scores.score_generator(options.x_axis) y_axis_scores = scores.score_generator(options.y_axis) for x_point, y_point in zip(x_axis_scores,y_axis_scores): x_tag= x_point[0] y_tag = y_point[0] if x_tag != y_tag: sys.exit("tags aren't equal, something is very wrong") data.append( (x_tag,x_point[1],y_point[1]) ) if options.pdb_list != "": pdb_list = fileutil.universal_open(options.pdb_list,"rU") for pdb in pdb_list:
def __init__(self,path): self.sasamap = sasa_map() sasafile = fileutil.universal_open(path,'r') self.__parse__(sasafile) sasafile.close()
current_item = ClusterItem(tag,cluster_id,struct_index) if options.silent != "": current_item.set_score(pose_scores.get_score(tag,"score")) elif options.pdbs != "": current_item.set_score(pose_scores.get_score(tag,0,"total")) try: clusters[cluster_id].append(current_item) except KeyError: clusters[cluster_id] = [current_item] print "Clusters:",num_clusters print "Structures:",num_structures #output the cluster summary to a file output_file = fileutil.universal_open(args[0],'w') output_file.write("tag\tfile_name\tscore\tsize\n") for key in clusters: cluster_list = clusters[key] cluster_list = sorted(cluster_list,key=lambda item: item.score) output_file.write(cluster_list[0].tag+"\tc."+str(key)+"."+str(cluster_list[0].struct_index)+".pdb\t"+str(cluster_list[0].score)+"\t"+str(len(cluster_list))+"\n") output_file.close() #output the histogram to a file bin_line = "bin" count_line = "count" for bin,count in histogram: bin_line +="\t"+bin count_line +="\t"+count bin_line += "\n"
#!/usr/bin/env python2.5 from optparse import OptionParser import sys from rosettautil.util import fileutil from rosettautil.bcl import file_formats usage = "%prog [options] tabbed_input.txt bcl_output.txt" parser = OptionParser(usage) (options, args) = parser.parse_args() in_file = fileutil.universal_open(args[0], "r") vector_data = file_formats.list_of_2D_vectors() for line in in_file: line = line.split() if len(line) == 0: continue vector_data.add_record(line[0], line[1]) in_file.close() vector_data.write_bcl_file(args[1])
current_item = ClusterItem(tag, cluster_id, struct_index) if options.silent != "": current_item.set_score(pose_scores.get_score(tag, "score")) elif options.pdbs != "": current_item.set_score(pose_scores.get_score(tag, 0, "total")) try: clusters[cluster_id].append(current_item) except KeyError: clusters[cluster_id] = [current_item] print "Clusters:", num_clusters print "Structures:", num_structures #output the cluster summary to a file output_file = fileutil.universal_open(args[0], 'w') output_file.write("tag\tfile_name\tscore\tsize\n") for key in clusters: cluster_list = clusters[key] cluster_list = sorted(cluster_list, key=lambda item: item.score) output_file.write(cluster_list[0].tag + "\tc." + str(key) + "." + str(cluster_list[0].struct_index) + ".pdb\t" + str(cluster_list[0].score) + "\t" + str(len(cluster_list)) + "\n") output_file.close() #output the histogram to a file bin_line = "bin" count_line = "count" for bin, count in histogram: