def best_of_experiments(acids_sequence, n, m, folding=None): """Take a sequence of acids. Give it a certain folding. Try to fold the protein m times to improve this folding. Print the resulting score. Repeat this n times. Return the best result. """ print('Start folding:', folding) best_score = 1 for _ in range(n): protein = Protein(acids_sequence) if folding == 'cube_folding': if not Algorithms.cube_folding(protein, shift='', d3=True): print("failed to get a cube folding as start") continue elif folding == 'random_folding': if not Algorithms.random_folding(protein): print("failed to get a random folding as start") continue start_score = Algorithms.score(protein) if not Algorithms.fold_n_times(m, protein): print("failed to fold n times") end_score = Algorithms.score(protein) if end_score < best_score: best_score = end_score best_result = [acid.copy() for acid in protein.acids] print( 'Start with score:\t{}\t\tEnd with score:\t{}\t\tBest score:\t{}'. format(start_score, end_score, best_score)) protein.acids = best_result return protein
def main(): # checks whether program is used correctly check() best_fold_points = 0 # makes user input into the protein class protein = Protein(argv[1]) # checks whether current option is better than all previous ones options = Option(protein.length) field = Field(protein.length, protein.sequence) best_fold = options.options[0] # ### while(not_all_options): # # creates field and fold based on the protein and the current option for option in options.options: if (field.fill_field(protein.sequence, option)): # check wether current fold is the best and remembers it if it is if int(fold_points(field, protein.errorpoint)) > int(best_fold_points): best_fold_points = fold_points(field, protein.errorpoint) best_fold = option field.clear_field(protein.length) field.x_cdn = protein.length - 1 field.y_cdn = protein.length # prints best_fold_points and best_fold and current field print(best_fold_points) print(best_fold) field.fill_field(protein.sequence, best_fold) for line in field.field: print(line)
def branch_n_bound(p_string, prob_above_avg, prob_below_avg, dimension, matrix_size): ''' This algorithm will fold a protein using a probability based version of the Branch and Bound algorithm. If the probabilities for pruning are set to 1 and 1, this algorithm behaves as a depth-first alhorithm and searches the whole statespace for the best solution. ''' # Set global variables global protein_string, prob_below_average, prob_above_average, length_total, energy_min_all, energy_min_partial prob_below_average = prob_below_avg prob_above_average = prob_above_avg protein_string = p_string length_total = len(protein_string) # Initialize global dictionaries global energy_counter, matrix_sizes, energy_tracker energy_tracker = [{} for i in range(length_total)] energy_counter = {} matrix_sizes = {} # Create a protein object with a specific matrix size protein = Protein(matrix_size, dimension) # Initialize energy variable that keeps the lowest energy for a complete protein energy_min_all = 1 # Initialize energy variable that keeps the lowest energy for a protein of each length energy_min_partial = [0] * length_total # Place first two amino acids protein.place_first_two(protein_string) previous_location = protein.last_acid # Call next_acid function to place a new amino acid next_acid(protein, previous_location) print(energy_tracker) print(energy_counter) print(sum(energy_counter.values())) if protein_min: return protein_min, energy_counter, matrix_sizes else: exit("Error: No protein 'protein_min' to return")
def beamsearch(p_string, width, dimension, matrix_size): ''' Runs a Beam Search algorithm with a predetermined width which determies how many proteins are kept at each new generation. The best conformation, the one that has the lowest energy, is saved and returned along with a dictionary of all energy counts and the minimal matrix sizes for the folded protein. ''' # Set global variables global best_nodes, protein_length, protein_string, energy_counter, proteins, B_width, matrix_sizes, initial_protein protein_string = p_string protein_length = len(protein_string) B_width = width # Create a protein object with a specific matrix size initial_protein = Protein(matrix_size, dimension) # Place the first two amino acids initial_protein.place_first_two(protein_string) previous_location = [initial_protein.last_acid] # Initialize dictionaries energy_counter = {} matrix_sizes = {} # Initialize the proteins dictionary that keeps track of the protein objects proteins = {} for i in range(B_width): proteins[i] = initial_protein # Start the search find_possibilities(previous_location) # Take the top protein as best protein protein_min = proteins[0] energy_min = protein_min.energy if protein_min: return protein_min, energy_counter, matrix_sizes else: exit("Error: No protein 'protein_min' to return")
def csv_loader(path, filename): ''' Args: filename: Name of the file being loaded. obj: Optional value indicating is the .csv being loaded to protein objects. Returns: array: Array of protein objects or values. Raises: ''' df = pd.read_csv(path + filename) array = [] for val in df.values: protein = Protein(val[0], val[1]) if len( protein.get_splitted_ec_number() ) > 0: #this is to take care i.e. 'n2' -numbers, they will have the 'n' removed array.append(protein) return array
def construct_protein_list(file_name): proteins_list = [] input_data = genfromtxt(file_name, dtype=None, delimiter=';', names=True) for line in input_data: current_protein = Protein(id=b2str(line['pdb']), azole=b2str(line['azole']), azole_group=str(line['azole_group'])) proteins_list.append(current_protein) return proteins_list
def add_acids(protein: Protein, start: int, end: int): ''' Adds acids that were previously removed between the start and end point ''' acid_index_list = list(range(start + 1, end)) end_location = None # If there is a start and end acid outside of the cut if start >= 0 and end <= protein.length - 1: end_location = protein.get_acid_index(end).location current_location = protein.get_acid_index(start).location # When the last acid is cut off elif start >= 0: current_location = protein.get_acid_index(start).location # When the first acid is cut off else: acid_index_list = acid_index_list[::-1] current_location = protein.get_acid_index(end).location # Add acids recursively _add_acids(protein, acid_index_list, end_location, current_location, 0)
def main(): # checks whether program is used correctly check() # makes user input into the protein class protein = Protein(argv[1]) # folds protein for testing if len(protein.current_option) > 6: protein.current_option[1] = "up" protein.current_option[2] = "up" protein.current_option[3] = "left" protein.current_option[4] = "left" protein.current_option[5] = "down" protein.current_option[6] = "down" # checks whether current option is better than all previous ones options = Option(protein.length) # for option in options: option
def __init__(self, sequence, maxScore=0, **kwargs): super(Fold2D, self).__init__(**kwargs) self.do_scale=False self.do_rotation=False self.translation_touches=2 self.translationLock=False self.reverseLock=False self.bg = Background(20) self.add_widget(self.bg) self.protein = Protein(sequence) self.add_widget(self.protein) self.scoreCounter = ScoreCounter(0,maxScore) self.add_widget(self.scoreCounter) self.win=False
def read_pdb(self, pdb): self.clear() chain_id = '-' res_num = None res_insert = ' ' is_last_chain_protein = False for line in open(pdb, 'r').readlines(): if line.startswith("ATOM"): atom = AtomFromPdbLine(line) if not is_last_chain_protein or chain_id != atom.chain_id: protein = Protein() protein.id = atom.chain_id chain_id = protein.id self.append_chain(protein) is_last_chain_protein = True res_num = None if (res_num != atom.res_num) or (res_insert != atom.res_insert): residue = Residue(atom.res_type, atom.chain_id, atom.res_num, atom.res_insert) residue.chain_id = chain_id protein.append_residue_no_renum(residue) res_num = atom.res_num res_insert = atom.res_insert protein.insert_atom(-1, atom) if line.startswith("HETATM"): atom = AtomFromPdbLine(line) if res_num != atom.res_num or chain_id != atom.chain_id: mol = Polymer() residue = Residue(atom.res_type, atom.chain_id, atom.res_num) residue.chain_id = atom.chain_id mol.append_residue_no_renum(residue) mol.id = atom.chain_id self.append_chain(mol) res_num = atom.res_num chain_id = atom.chain_id last_chain_is_polymer = False mol.insert_atom(0, atom) if line.startswith("TER"): chain_id = '-' if line.startswith("ENDMDL"): break
def dock(): time.sleep(1) return make_response(jsonify({"affinity": -7.5})) data = json.loads(request.data) directory = "proteins/" + str(data['protein']) + "/Structures/" + str( data['structure']) p1 = None for el in os.listdir(directory): extension = None if el.__contains__('.txt') or el.__contains__('.conf'): extension = el # directory for the conf.txt prtdir = directory + "/" + str(extension) # if to get rid of .ds-store and other weird files if extension is not None: p1 = Protein(prtdir) affinity = Setup.dock(p1, str(app.instance_path) + str(data['ligand'])) obj = {"affinity": affinity} return make_response(jsonify(obj))
def read_uniprot_sequence(): ''' Returns: connection_array: protein_array: ''' file = 'uniprot_sprot.dat' #counter = 0 #temporary for testing, no need to read the whole file yet protein_array = [] ec_array = [] connection_array = [] for record in SwissProt.parse(open(file)): if 'EC=' in record.description: #counter += 1 #sequence is the string of the primary sequence #given by the markers of the residues print(record.sequence) #print(record.accessions) #holds the uniprot ids #description consists of ';' separated parts print(record.description) tokens = record.description.split(';') for token in tokens: if 'EC=' in token: parts = token.split('=') #split header ec_parts = parts[1].split(' ') #split additional content if ec_parts[0] not in ec_array: print('EC: ', ec_parts[0]) #print EC number as a string ec_array.append(ec_parts[0]) connection_array.append( [ec_parts[0], record.accessions[0]]) protein_array.append( Protein(ec_parts[0], record.accessions[0])) #if counter >= 10000: # break return connection_array, protein_array
def read_pdb(self, pdb): self.clear() chain_id = '-' res_num = None res_insert = ' ' is_last_chain_protein = False for line in open(pdb, 'r').readlines(): if line.startswith("ATOM"): atom = AtomFromPdbLine(line) if not is_last_chain_protein or chain_id != atom.chain_id: protein = Protein() protein.id = atom.chain_id chain_id = protein.id self.append_chain(protein) is_last_chain_protein = True res_num = None if (res_num != atom.res_num) or (res_insert != atom.res_insert): residue = Residue(atom.res_type, atom.chain_id, atom.res_num, atom.res_insert) residue.chain_id = chain_id protein.append_residue_no_renum(residue) res_num = atom.res_num res_insert = atom.res_insert protein.insert_atom(-1, atom) if line.startswith("HETATM"): atom = AtomFromPdbLine(line) if res_num != atom.res_num or chain_id != atom.chain_id: mol = Polymer() residue = Residue(atom.res_type, atom.chain_id, atom.res_num) residue.chain_id = atom.chain_id mol.append_residue_no_renum(residue) mol.id = atom.chain_id self.append_chain(mol) res_num = atom.res_num chain_id = atom.chain_id last_chain_is_polymer = False mol.insert_atom(0, atom); if line.startswith("TER"): chain_id = '-' if line.startswith("ENDMDL"): break
def setupProtein(proteinToUse, structure): # searching through the proteins directory to find the protein the user wants to dock to for i in os.listdir("proteins"): if i.lower() == proteinToUse.lower(): directory = "proteins/" + i + "/Structures" print("We have found the directory and it is for the protein --> " + i) if directory is None: print("Couldn't find directory") # should return to the API with null or something if the protein files don't exist exit(0) # now searching through and creating protein array for i in os.listdir(directory): extension = None loc = os.listdir(directory + "/" + i) for el in loc: if el.__contains__('.txt') or el.__contains__('.conf'): extension = el # directory for the conf.txt prtdir = directory + "/" + i + "/" + str(extension) # if to get rid of .ds-store and other weird files if extension is not None: p1 = Protein(prtdir, directory + "/" + i + "/", i)
def __init__(self, trajfile, indexfile, distance_criteria, outputfile, thickness, simplethickness, insertion, printnatoms): """Instanciates a Trajectory object and checks some input the consistency of the input arguments Requires: trajfile indexfile outputfile thickness insertion Ensures: The input arguments are correctly assigned to the attributes, considering the help messages provided to the user """ self._trajfile = trajfile self._indexfile = indexfile self._distance_criteria = distance_criteria if outputfile: self._outputfile = outputfile else: self._outputfile = None self._printnatoms = printnatoms self._thickness = thickness self._simplethickness = simplethickness if thickness and simplethickness: raise IOError( 'Incompatible arguments: simplethickness and thickness.') if thickness: self._thicknessOutput1 = '' self._thicknessOutput2 = '' nargs_thickness = len(thickness) if nargs_thickness < 2: raise IOError('The thickness argument should have at least 2' ' fields (the window size and step)') elif nargs_thickness > 5: raise IOError('The thickness argument should have at most 5 ' 'fields (the window size, step, minimum and ' 'and maximum values)') elif simplethickness: self._thicknessOutput = '' self._insertion = insertion if insertion: self._insertionOutput = '' nargs_insertion = len(insertion) if insertion[0] == 'closest' or \ insertion[0] == 'average': if nargs_insertion == 1: self._insertion_window = insertion[0] else: print 'Warning: Extra arguments have been '\ 'submitted and will be ignored' elif insertion[0] == 'zero': if nargs_insertion == 2: self._insertion_window = insertion[0] elif nargs_insertion == 1: raise IOError( 'Cutoff missing. The center of the ' 'membrane requires the definition of a cutoff ' 'beyond which bulk properties are assumed.') else: print 'Warning: Extra arguments have been '\ 'submitted and will be ignored' else: if nargs_insertion < 2: raise IOError('The insertion argument requires ' 'at least 2 fields (window_size and step)') elif nargs_insertion > 5: raise IOError('The insertion argument should ' 'have at most 5 fields') self._curtime = None self._box = None self._protein = Protein() self._CoI = Protein() self._membrane = Membrane() self.loadIndex() proteinCounter = 0 coiCounter = 0 for i in self._protein.getAtomsNumbers(): proteinCounter += 1 for i in self._CoI.getAtoms(): coiCounter += 1 if self._insertion and coiCounter < 1: raise IOError('The provided index file should have at least one ' 'atom belonging to the Center_of_Interest group') elif self._thickness and proteinCounter < 1: raise IOError('The provided index file should have at least one ' 'atom belonging to the Protein group') top_memb_size = len(self._membrane.getLeafletAtoms('one')) bottom_memb_size = len(self._membrane.getLeafletAtoms('two')) if top_memb_size < 1 or bottom_memb_size < 1: raise IOError('The provided index file should have at least one ' 'atom in both Monolayer1 and ' 'Monolayer2 groups') if not insertion and not (thickness or simplethickness): raise IOError('This script can calculate thickness and insertion ' 'provided you use the -thickness or -insertion ' 'arguments respectively')
class Trajectory: def __init__(self, trajfile, indexfile, distance_criteria, outputfile, thickness, simplethickness, insertion, printnatoms): """Instanciates a Trajectory object and checks some input the consistency of the input arguments Requires: trajfile indexfile outputfile thickness insertion Ensures: The input arguments are correctly assigned to the attributes, considering the help messages provided to the user """ self._trajfile = trajfile self._indexfile = indexfile self._distance_criteria = distance_criteria if outputfile: self._outputfile = outputfile else: self._outputfile = None self._printnatoms = printnatoms self._thickness = thickness self._simplethickness = simplethickness if thickness and simplethickness: raise IOError( 'Incompatible arguments: simplethickness and thickness.') if thickness: self._thicknessOutput1 = '' self._thicknessOutput2 = '' nargs_thickness = len(thickness) if nargs_thickness < 2: raise IOError('The thickness argument should have at least 2' ' fields (the window size and step)') elif nargs_thickness > 5: raise IOError('The thickness argument should have at most 5 ' 'fields (the window size, step, minimum and ' 'and maximum values)') elif simplethickness: self._thicknessOutput = '' self._insertion = insertion if insertion: self._insertionOutput = '' nargs_insertion = len(insertion) if insertion[0] == 'closest' or \ insertion[0] == 'average': if nargs_insertion == 1: self._insertion_window = insertion[0] else: print 'Warning: Extra arguments have been '\ 'submitted and will be ignored' elif insertion[0] == 'zero': if nargs_insertion == 2: self._insertion_window = insertion[0] elif nargs_insertion == 1: raise IOError( 'Cutoff missing. The center of the ' 'membrane requires the definition of a cutoff ' 'beyond which bulk properties are assumed.') else: print 'Warning: Extra arguments have been '\ 'submitted and will be ignored' else: if nargs_insertion < 2: raise IOError('The insertion argument requires ' 'at least 2 fields (window_size and step)') elif nargs_insertion > 5: raise IOError('The insertion argument should ' 'have at most 5 fields') self._curtime = None self._box = None self._protein = Protein() self._CoI = Protein() self._membrane = Membrane() self.loadIndex() proteinCounter = 0 coiCounter = 0 for i in self._protein.getAtomsNumbers(): proteinCounter += 1 for i in self._CoI.getAtoms(): coiCounter += 1 if self._insertion and coiCounter < 1: raise IOError('The provided index file should have at least one ' 'atom belonging to the Center_of_Interest group') elif self._thickness and proteinCounter < 1: raise IOError('The provided index file should have at least one ' 'atom belonging to the Protein group') top_memb_size = len(self._membrane.getLeafletAtoms('one')) bottom_memb_size = len(self._membrane.getLeafletAtoms('two')) if top_memb_size < 1 or bottom_memb_size < 1: raise IOError('The provided index file should have at least one ' 'atom in both Monolayer1 and ' 'Monolayer2 groups') if not insertion and not (thickness or simplethickness): raise IOError('This script can calculate thickness and insertion ' 'provided you use the -thickness or -insertion ' 'arguments respectively') def getInsertionOutput(self): return self._insertionOutput def analyseTrajectory(self): def createOutputFile(filename): outputname = self.getOutputName(filename) os.system('rm -f {0}'.format(outputname)) return outputname traj = self.loadTrajectory() if self._insertion: outputnameInsertion = createOutputFile("insertion") if self._thickness: outputnameThicknessTop = createOutputFile("thicknessTop") outputnameThicknessAvg1 = createOutputFile("thicknessTop_avg") outputnameThicknessBottom = createOutputFile("thicknessBottom") outputnameThicknessAvg2 = createOutputFile("thicknessBottom_avg") if self._simplethickness: outputnameThickness = createOutputFile("thickness") for frame in traj: if self._insertion: # Calculate geometric center of Center_of_Interest self._CoI.calcCenter() if 'zero' == self._insertion[0]: # Calculate the Membrane Half Z self._membrane.calcHalfMembraneZ( self._protein, (0, 0, 0, 0, self._insertion[1]), self._box) else: # Choose leaflet self._membrane.chooseClosestLeaflet( self._CoI, self._box, self._distance_criteria) # Calculate insertion insertion = self._CoI.getInsertion(self._membrane, self._insertion, self._box, outputnameInsertion, self) if args.printclosestleaflet: insertion = '{0} {1}'.format( insertion, self._membrane._closestLeaflet) # Save to Output self.saveOutput(outputnameInsertion, insertion) if self._thickness: # Calculate the Membrane Half Z self._membrane.calcHalfMembraneZ(self._protein, self._thickness, self._box) # Attribution of the Protein atoms to membrane # leaflets ('bottom' and 'top') self._CoI.calcAtomsClosestML(self._membrane) # Calculate the Thickness for ML1 thicknessTop = self._membrane.getThickness( self._CoI, 'top', self._box, self._thickness, outputnameThicknessTop, self._printnatoms) # Calculate the Thickness for ML2 thicknessBottom = self._membrane.getThickness( self._CoI, 'bottom', self._box, self._thickness, outputnameThicknessBottom, self._printnatoms) self._CoI.clearLeafletAtoms() # Save the Outputs self.saveOutput(outputnameThicknessTop, thicknessTop) self.saveOutput(outputnameThicknessBottom, thicknessBottom) if self._simplethickness: # Calculate the Membrane Thickness thickness = self._membrane.getSimpleThickness( outputnameThickness) # Save the Outputs self.saveOutput(outputnameThickness, thickness) # Write to Output if self._insertion: self.writeOutput(outputnameInsertion) if self._thickness: self.writeOutput(outputnameThicknessTop) self.writeOutput(outputnameThicknessBottom) avgs_top, windows_top,\ avgs_bottom, windows_bottom = self._membrane.calcThicknessAvg() self.writeAvgOutput(outputnameThicknessAvg1, avgs_top, windows_top) self.writeAvgOutput(outputnameThicknessAvg2, avgs_bottom, windows_bottom) if self._simplethickness: self.writeOutput(outputnameThickness) def loadIndex(self): with open(self._indexfile) as f: addTo = None for line in f: line = line.strip() if '[ ' in line and ' ]' in line: indexName = line.replace('[', '').replace(']', '') indexName = indexName.replace(' ', '').lower() if 'protein' == indexName: addTo = 'protein' elif 'center_of_interest' == indexName: addTo = 'center_of_interest' elif 'monolayer1' == indexName: addTo = 'monolayer1' elif 'monolayer2' == indexName: addTo = 'monolayer2' else: addTo = None elif addTo: for atomNumber in line.split(): if addTo == 'protein': self._protein.addAtom(atomNumber) elif addTo == 'center_of_interest': self._CoI.addAtom(atomNumber) elif addTo == 'monolayer1': self._membrane.addAtom(atomNumber, 'one') elif addTo == 'monolayer2': self._membrane.addAtom(atomNumber, 'two') def loadTrajectory(self): def readLine(line): atype = line[12:16].strip() residue = line[23:26] x = float(line[30:38]) y = float(line[38:46]) z = float(line[46:54]) return atype, residue, x, y, z proteinAtoms = self._protein.getAtomsNumbers() CoIAtoms = self._CoI.getAtomsNumbers() membraneAtoms = self._membrane.getAtomsNumbers() with open(self._trajfile) as f: for line in f: if line[0:4] == 'ATOM': number = line[4:11].strip() if number in proteinAtoms: atype, residue, x, y, z = readLine(line) self._protein.addProperties(number, atype, residue, x, y, z) if number in CoIAtoms: atype, residue, x, y, z = readLine(line) self._CoI.addProperties(number, atype, residue, x, y, z) elif number in membraneAtoms: atype, residue, x, y, z = readLine(line) self._membrane.addProperties(number, atype, residue, x, y, z) elif line[0:6] == 'CRYST1': fields = line.split() box_x = float(fields[1]) box_y = float(fields[2]) box_z = float(fields[3]) self._box = box_x, box_y, box_z elif line[0:5] == 'TITLE': line = line.strip() time = line.split('t=')[1].split()[0] self._curtime = int(float(time)) elif line[0:3] == 'TER': if not self._CoI.IndexandTrajAtomsMatch(): raise IOError( 'Index file not correct. CoI group atoms in the index do ' 'not match the trajectory file') if not self._protein.IndexandTrajAtomsMatch(): raise IOError( 'Index file not correct. Protein group atoms in the index do ' 'not match the trajectory file') yield def getOutputName(self, prefix): if self._outputfile: outputname = '{0}_{1}.xvg'.format(self._outputfile, prefix) else: outputname = '{0}.xvg'.format(prefix) return outputname def saveOutput(self, outputname, data): if data[:4] == 'time': line = '' else: line = '{0:9f} '.format(self._curtime) nNaNs = 0 for value in data.split(' '): if value == '\n': line = '{0}\n{1:9f}\t'.format(line, self._curtime) else: line = '{0}{1:5s} '.format(line, value) if value == 'NaN': nNaNs += 1 data_type = outputname.split('_')[-1].replace('.xvg', '') if data_type == 'insertion': self._insertionOutput += line + '\n' elif data_type == 'thicknessTop': # If all NaNs don't save the data if nNaNs != (len(data.split(' ')) - 2) / 3: self._thicknessOutput1 += line + '\n' elif data_type == 'thicknessBottom': # If all NaNs don't save the data if nNaNs != (len(data.split(' ')) - 2) / 3: self._thicknessOutput2 += line + '\n' elif data_type == 'thickness': self._thicknessOutput += line + '\n' def writeOutput(self, outputname): data_type = outputname.split('_')[-1].replace('.xvg', '') if data_type == 'insertion': data = self._insertionOutput elif data_type == 'thicknessTop': data = self._thicknessOutput1 elif data_type == 'thicknessBottom': data = self._thicknessOutput2 elif data_type == 'thickness': data = self._thicknessOutput with open(outputname, 'w') as f: if len(data) == 0: f.write('No occurrences in this monolayer\n') else: f.write(data) def writeAvgOutput(self, outputname, avgs, windows): text = '' with open(outputname, 'w') as f: for i in range(len(windows)): text += '{0:9} {1:9}\n'.format(windows[i], avgs[i]) if len(text) == 0: text = 'No occurrences in this monolayer\n' f.write(text)
args = parse_args() if not os.path.exists(args.dataset_file): raise IOError('%s does not exist.' % args.dataset_file) if not os.path.exists(args.protein_path): raise IOError('%s does not exist.' % args.protein_path) if not os.path.exists(args.model_path): raise IOError('%s does not exist.' % args.model_path) if not os.path.exists(args.output): os.makedirs(args.output) with open(args.dataset_file, 'r') as f: lines = f.readlines() protein_names = [line[:-1] for line in lines] for prot in protein_names: protein = Protein(os.path.join(args.protein_path, prot + '.pdb'), args.protonate, args.expand, args.f, args.output, args.discard_points) nn = Network(args.model_path, args.model, args.voxel_size) lig_scores = nn.get_lig_scores(protein, args.batch) extractor = Bsite_extractor(args.T) extractor.extract_bsites(protein, lig_scores)
def greedy(protein_string, look_aheads, N_tries, dimension, matrix_size): ''' Runs a Greedy look-ahead algorithm in which N_tries proteins are randomly created from a string of amino acid types. The best conformation, the one that has the lowest energy, is saved and returned along with a dictionary of all energy counts and the minimal matrix sizes for the folded protein. ''' # Create a protein object with a specific matrix size protein = Protein(matrix_size, dimension) # Place the first two amino acids protein.place_first_two(protein_string) location = protein.last_acid energy_min = 1 energy_counter = {} matrix_sizes = {} # Try to fold N_tries protein greedy like for i in range(N_tries): if (i + 1) % 1 == 0: print(f"{i + 1}th protein folded") # Remove acids until only the first two are left while protein.length > 2: protein.remove_acid(0) solution_found, protein = greedy_fold(protein, protein_string, look_aheads) while not solution_found: while protein.length > 2: protein.remove_acid(0) solution_found, protein = greedy_fold(protein, protein_string, look_aheads) # When a protein is created save its energy if solution_found: energy = protein.energy # When its energy is lower than lowest energy found, save the protein if energy < energy_min: energy_min = energy protein_min = copy.deepcopy(protein) print(f"New minimum energy found: {energy_min}") # Update the dictonary for histogram of solutions energy_counter[energy] = energy_counter.get(energy, 0) + 1 # Determine the smallest matrix size needed for this protein min_matrix_size = protein.smallest_matrix() matrix_sizes[energy] = matrix_sizes.get(energy, {}) matrix_sizes[energy][min_matrix_size] = matrix_sizes[energy].get( min_matrix_size, 0) + 1 if protein_min: return protein_min, energy_counter, matrix_sizes else: exit("Error: No protein 'protein_min' to return")
def Loadpdb(pdb=None, hetatm= True, verbose=False): try: assert(pdb != None) #Check if filehandle to PDB file is passed except AssertionError: sys.exit("**No filehandle passed**. Pass a filehandle (to a pdb file) as an argument to Loadpdb.") AtomNumber=0 #Keeps track of atom indices (assigned in the order atoms listed in input file) mol_data={} #Key: molid; Value: Molecule_Type Object; Keep track of different molecules (different chains or molecule type) in input structure first_res =True #To identify molecule type of every molecule in input structure and accordingly define Molecule object. Prev_res=0 # to keep track of residue change Prev_chain='aa' # to keep track of chain change in HETATM section atmTohet = True #To determine transition from ATOM to HETATM record frame_tag = '' # To keep track of multi-frame entry (multiple entry for same molecule type with same chain id) '''Load the PDB structure file''' for line in pdb: if line[0:4]=="ATOM" and line[12:16].upper().strip() not in ["OXT"]: AtomNumber+=1 AtomName, ResName, Chain, ResNo, CordX, CordY, CordZ, Occ, Bfac = Pdbcordsec(line) atm = Atom(AtomName, AtomNumber, ResName, Chain, ResNo, CordX, CordY, CordZ) #Check for unrecognized residue and new molecule if not first_res: if ResName.lower() not in Mol_types['protein'] + Mol_types['lipid'] + Mol_types['ligand']: print "*** Unrecognized residue name: "+ ResName+ " ***.\nAdded %s as Ligand." % ResName sys.exit("In file configstruc.py: Add missing residue name("+ ResName+ ") to appropriate molecule in Mol_types") if ResNo != Prev_res or (Prev_chain != Chain and mol.molecule_type().lower()=='ligand'): if mol.molecule_type().lower()=='ligand': mol_data[Molecule.molid] = deepcopy(mol) #copy mol object into dictionary first_res = True elif Prev_chain != Chain or ResName.lower() not in Mol_types[mol.molecule_type().lower()]: #Either Chain is different or New residue doesn't belong to current molecule type mol_data[Molecule.molid] = deepcopy(mol) #copy mol object into dictionary first_res = True elif frame_tag.lower() in ['endmdl', 'ter', 'end'] and Prev_chain == Chain: #Different molecule (of same molecule type) with same chain id; as in trajectory frames mol_data[Molecule.molid] = deepcopy(mol) #copy mol object into dictionary first_res = True if first_res: #Initialize mol for new chain or molecule if ResName.lower() in Mol_types['protein']: mol = Protein() elif ResName.lower() in Mol_types['lipid']: mol = Lipid() elif ResName.lower() in Mol_types['ligand']: mol = Ligand() else: print "*** Unrecognized residue name: "+ ResName+ " ***.\n Cannot initialize Molecule object." sys.exit("In file configstruc.py: Add missing residue name("+ ResName+ ") to appropriate molecule in Mol_types") first_res = False frame_tag = '' mol.AddToResidue(atom=atm, occ=Occ, bfac=Bfac) mol.atmidx.append(AtomNumber) Prev_res= ResNo Prev_chain=Chain elif line[0:6]=="HETATM" and hetatm == True: if atmTohet: mol_data[Molecule.molid] = deepcopy(mol) #copy mol object into dictionary first_res = True atmTohet = False AtomNumber+=1 AtomName, ResName, Chain, ResNo, CordX, CordY, CordZ, Occ, Bfac = Pdbcordsec(line) atm = Atom(AtomName, AtomNumber, ResName, Chain, ResNo, CordX, CordY, CordZ) #Check for new ligand molecule if not first_res and (ResNo != Prev_res or Prev_chain != Chain): mol_data[Molecule.molid] = deepcopy(mol) #copy mol object into dictionary first_res = True #Initialize mol for new chain or molecule if first_res: if ResName.lower() in Mol_types['ligand']: mol = Ligand() else: print "*** Unrecognized residue name: "+ ResName+ " ***.\n Cannot initialize Ligand object." sys.exit("In file configstruc.py: Add missing residue name ("+ ResName+ ") to ligand molecule in Mol_types") first_res = False mol.AddToResidue(atom=atm, occ=Occ, bfac=Bfac) mol.atmidx.append(AtomNumber) Prev_res= ResNo Prev_chain=Chain elif line[0:3].lower() in ["ter", "end"] or line[0:6].lower() == "endmdl": frame_tag = line[0:3] #append the last mol object to mol_data mol_data[Molecule.molid] = deepcopy(mol) #copy mol object into dictionary if verbose: print "Number of molecules in input file: ", len(mol_data), "\n" #Update mol_data[molid].nor, mol_data[molid].resids, and check for chain breaks in non-ligand molecules for key in sorted(mol_data): if verbose: print "Molid:", key,"Molecule_Type:",mol_data[key].molecule_type() mol_data[key].resids = sorted(mol_data[key].residue) mol_data[key].nor = len(mol_data[key].resids) if mol_data[key].molecule_type().lower() != 'ligand': #Check for chain breaks in non-ligand molecule resids_diff=numpy.array(mol_data[key].resids[1:]) - numpy.array(mol_data[key].resids[:-1]) if mol_data[key].nor != (numpy.sum(resids_diff)+1): break_indices = numpy.where(resids_diff > 1) print "Chain break encountered in molecule",key, "at residue positions: " for res in break_indices[0]: print mol_data[key].resids[res], print "\n" mol_data[key].chain_break = True return mol_data
def Loadpdb(pdb=None, hetatm=True, verbose=False): try: assert (pdb != None) #Check if filehandle to PDB file is passed except AssertionError: sys.exit( "**No filehandle passed**. Pass a filehandle (to a pdb file) as an argument to Loadpdb. " ) AtomNumber = 0 #Keeps track of atom indices (assigned in the order atoms listed i input file) mol_data = { } #Key: molid; Keep track of different molecules (different chains or molecule type) in input structure check_het = False #To keep track of new Hetero residue first_res = True #To identify molecule type of every molecule in input structure and accordingly define Molecule object. Prev_res = 0 # to keep track of residue change in HETATM section; a new Molecule object is assigned for every residue. Prev_chain = 'a' # to keep track of chain change in HETATM section '''Load the PDB structure file''' for line in pdb: if line[0:4] == "ATOM" and line[12:16].upper().strip() not in ["OXT"]: AtomNumber += 1 AtomName, ResName, Chain, ResNo, CordX, CordY, CordZ, Occ, Bfac = Pdbcordsec( line) atm = Atom(AtomName, AtomNumber, ResName, Chain, ResNo, CordX, CordY, CordZ) if first_res: #Initialize mol for new chain or molecule if ResName.lower() in Mol_types['protein']: mol = Protein() elif ResName.lower() in Mol_types['ligand']: mol = Ligand() else: print "*** Unrecognized residue name: " + ResName + " ***.\n Cannot initialize Molecule object." sys.exit( "In file configstruc.py: Add missing residue name(" + ResName + ") to appropriate molecule in Mol_types") first_res = False mol.AddToResidue(atom=atm, occ=Occ, bfac=Bfac) mol.atmidx.append(AtomNumber) elif line[0:3] == "TER": mol_data[Molecule.molid] = deepcopy( mol) #copy mol object into dictionary first_res = True # mol object will be initialized to molecule type of next molecule elif line[0:6] == "HETATM" and hetatm == True: AtomNumber += 1 AtomName, ResName, Chain, ResNo, CordX, CordY, CordZ, Occ, Bfac = Pdbcordsec( line) atm = Atom(AtomName, AtomNumber, ResName, Chain, ResNo, CordX, CordY, CordZ) #Check for new molecule if ( ResNo != Prev_res or Prev_chain != Chain ) and check_het == True: #For first HETATM check_het is always False mol_data[Molecule.molid] = deepcopy( mol) #copy mol object into dictionary first_res = True #Initialize mol for new chain or molecule if first_res: if ResName.lower() in Mol_types['ligand']: mol = Ligand() else: print "*** Unrecognized residue name: " + ResName + " ***.\n Cannot initialize Ligand object." sys.exit( "In file configstruc.py: Add missing residue name (" + ResName + ") to ligand molecule in Mol_types") first_res = False mol.AddToResidue(atom=atm, occ=Occ, bfac=Bfac) mol.atmidx.append(AtomNumber) if Prev_res == 0: check_het = True Prev_res = ResNo Prev_chain = Chain if hetatm: #If HETATM record was added; append the last hetero residue object to mol_data mol_data[Molecule.molid] = deepcopy( mol) #copy mol object into dictionary if verbose: print "Number of molecules in input file: ", len(mol_data), "\n" #Update mol_data[molid].nor, mol_data[molid].resids, and check for chain breaks in non-ligand molecules for key in sorted(mol_data): if verbose: print "Molid:", key, "Molecule_Type:", mol_data[key].molecule_type( ) if mol_data[key].molecule_type().lower() != 'ligand': mol_data[key].resids = sorted(mol_data[key].residue) mol_data[key].nor = len(mol_data[key].resids) #Check for chain breaks in protein resids_diff = numpy.array(mol_data[key].resids[1:]) - numpy.array( mol_data[key].resids[:-1]) if mol_data[key].nor != (numpy.sum(resids_diff) + 1): break_indices = numpy.where(resids_diff > 1) print "Chain break encountered in molecule", key, "at residue positions: " for res in break_indices[0]: print mol_data[key].resids[res], print "\n" return mol_data
class Fold2D(ScatterPlane): lockGrowingPeptide = False lockSelectingResidue = False def __init__(self, sequence, maxScore=0, **kwargs): super(Fold2D, self).__init__(**kwargs) self.do_scale=False self.do_rotation=False self.translation_touches=2 self.translationLock=False self.reverseLock=False self.bg = Background(20) self.add_widget(self.bg) self.protein = Protein(sequence) self.add_widget(self.protein) self.scoreCounter = ScoreCounter(0,maxScore) self.add_widget(self.scoreCounter) self.win=False def remove(self): self.protein.remove() self.remove_widget(self.scoreCounter) def on_touch_down(self, touch): super(Fold2D, self).on_touch_down(touch) if self.protein.toolBar.collide_point(touch.x,touch.y) and not self.reverseLock: self.protein.reverseSequence() self.reverseLock=True def on_touch_move(self, touch): super(Fold2D, self).on_touch_move(touch) if multitouch and self.translationLock: return self.lockGrowingPeptide = True self.protein.placeAA((touch.x,touch.y)) self.scoreCounter.setScore(self.protein.score) def on_touch_up(self, touch): super(Fold2D, self).on_touch_up(touch) if multitouch and self.translationLock: return if not self.lockGrowingPeptide and not self.reverseLock: self.protein.select((touch.x,touch.y)) self.lockGrowingPeptide = False self.reverseLock=False self.scoreCounter.setScore(self.protein.score) print self.scoreCounter.score, self.scoreCounter.anticipatedScore if self.scoreCounter.score >= self.scoreCounter.anticipatedScore: self.win=True #self.parent.back() def transform_with_touch(self, touch): if not multitouch: return if len(self._touches) == self.translation_touches: self.translationLock=True dx = (touch.x - self._last_touch_pos[touch][0]) \ * self.do_translation_x dy = (touch.y - self._last_touch_pos[touch][1]) \ * self.do_translation_y dx = dx / self.translation_touches dy = dy / self.translation_touches changed = True self.protein.translate(dx,dy) else: self.translationLock=False def apply_transform(self, trans, post_multiply=False, anchor=(0, 0) ): return def update(self, dt): pass
def main(argv=None): # IGNORE:C0111 '''Command line options.''' if argv is None: argv = sys.argv else: sys.argv.extend(argv) program_name = os.path.basename(sys.argv[0]) program_version = "v%s" % __version__ program_build_date = str(__updated__) program_version_message = '%%(prog)s %s (%s)' % (program_version, program_build_date) program_shortdesc = __import__('__main__').__doc__.split("\n")[1] program_license = '''%s Created by Kyle Monson on %s. Copyright 2015 Pacific Northwest National Laboratory. All rights reserved. Licensed under the Apache License 2.0 http://www.apache.org/licenses/LICENSE-2.0 Distributed on an "AS IS" basis without warranties or conditions of any kind, either express or implied. USAGE ''' % (program_shortdesc, str(__date__)) try: # Setup argument parser parser = ArgumentParser(description=program_license, formatter_class=RawDescriptionHelpFormatter) parser.add_argument("-v", "--verbose", dest="verbose", action="count", help="set verbosity level [default: %(default)s]") parser.add_argument('-V', '--version', action='version', version=program_version_message) parser.add_argument(dest="input", help="path to input folder", metavar="input_path") parser.add_argument(dest="output", help="paths to output folder", metavar="output_path") parser.add_argument( "--test", action='store_true', default=False, help="Run basic sanity tests using selected input.") parser.add_argument("--dump-state", action='store_true', default=False, help="Dump state to state.txt in output.") # Process arguments args = parser.parse_args() input_path = args.input output_path = args.output verbose = args.verbose dump_state = args.dump_state if verbose > 0: print("Verbose mode on") try: if verbose > 0: print("Creating output directory") os.makedirs(output_path) except os.error: if verbose > 0: print("Output directory already exists.") interaction_filepath = os.path.join(input_path, INTERACTION_BASE_FILENAME) background_filepath = os.path.join(input_path, BACKGROUND_BASE_FILENAME) desolvation_filepath = os.path.join(input_path, DESOLVATION_BASE_FILENAME) with open(interaction_filepath) as interaction_file, \ open(background_filepath) as background_file, \ open(desolvation_filepath) as desolvation_file: protein = Protein(interaction_file, desolvation_file, background_file) state_file = None if dump_state: state_file = open(os.path.join(output_path, "state.txt"), 'w') start = datetime.now() curves = get_titration_curves(protein.protein_complex, state_file) end = datetime.now() delta = end - start delta_seconds = delta.total_seconds() with open(os.path.join(output_path, "timing.txt"), 'a') as timing_file: timing_file.write(str(delta_seconds) + '\n') if dump_state: state_file.close() create_output(output_path, curves) #pprint(dict(curves)) if args.test: import tests #tests.test_normalize(protein) #tests.test_stuff(protein) #tests.test_adding_ph(protein) return 0 except KeyboardInterrupt, e: ### handle keyboard interrupt ### if DEBUG: raise (e) return 0
from protein import Protein, trypsin from measurement import read_mgf import pandas as pd from multiprocessing import Pool OVA = "GSIGAASMEFCFDVFKELKVHHANENIFYCPIAIMSALAMVYLGAKDSTRTQINKVVRFDKLPGFGDSIEAQCGTSVNVHSSLRDILNQITKPNDVYSFSLASRLYAEERYPILPEYLQCVKELYRGGLEPINFQTAADQARELINSWVESQTNGIIRNVLQPSSVDSQTAMVLVNAIVFKGLWEKAFKDEDTQAMPFRVTEQESKPVQMMYQIGLFRVASMASEKMKILELPFASGTMSMLVLLPDEVSGLEQLESIINFEKLTEWTSSNVMEERKIKVYLPRMKMEEKYNLTSVLMAMGITDVFSSSANLSGISSAESLKISQAVHAAHAEINEAGREVVGSAEAGVDAASVSEEFRADHPFLFCIKHIATNAVLFFGRCVSP" LYS = "KVFGRCELAAAMKRHGLDNYRGYSLGNWVCAAKFESNFNTQATNRNTDGSTDYGILQINSRWWCNDGRTPGSRNLCNIPCSALLSSDITASVNCAKKIVSDGNGMNAWVAWRNRCKGTDVQAWIRGCRL" BSA = "DTHKSEIAHRFKDLGEEHFKGLVLIAFSQYLQQCPFDEHVKLVNELTEFAKTCVADESHAGCEKSLHTLFGDELCKVASLRETYGDMADCCEKQEPERNECFLSHKDDSPDLPKLKPDPNTLCDEFKADEKKFWGKYLYEIARRHPYFYAPELLYYANKYNGVFQECCQAEDKGACLLPKIETMREKVLTSSARQRLRCASIQKFGERALKAWSVARLSQKFPKAEFVEVTKLVTDLTKVHKECCHGDLLECADDRADLAKYICDNQDTISSKLKECCDKPLLEKSHCIAEVEKDAIPENLPPLTADFAEDKDVCKNYQEAKDAFLGSFLYEYSRRHPEYAVSVLLRLAKEYEATLEECCAKDDPHACYSTVFDKLKHLVDEPQNLIKQNCDQFEKLGEYGFQNALIVRYTRKVPQVSTPTLVEVSRSLGKVGTRCCTKPESERMPCTEDYLSLILNRLCVLHEKTPVSEKVTKCCTESLVNRRPCFSALTPDETYVPKAFDEKLFTFHADICTLPDTEKQIKKQTALVELLKHKPKATEEQLKTVMENFVAFVDKCCAADDKEACFAVEGPKLVVSTQTALA" # LYS můstky # VFGRCELAAA + WIRGCRL # GNWVCAAKFE + WRNRCKGTDV # SRWWCNDGRT + CNIPCSALLS # SRNLCNIPCS + ASVNCAKKIV protein = Protein(LYS) peptides = list(protein.digest(trypsin)) measurements = list(read_mgf("../data/mgf/190318_LYS_RAT_50x_05.mgf")) # Protein Pilot # Paragon # AA tagy ze spektra soft_err_ppm = 50 hard_err_ppm = 10 result = [] peps_with_threshold = [(pep, (hard_err_ppm / 1e6) * pep.total_mz) for pep in peptides] for i, m in enumerate(measurements): if i % 500 == 0: print(f"Done: {i}")
def dock(self): #setup tables with distances, errors and weights expDistances = [] expErrors = [] weights = [] constraintNames = [] for restraint in self.restraints: constraintNames.append( "%s-%s" % (restraint["anchorAname"], restraint["anchorBname"])) expDistances.append(restraint["distance"]) expErrors.append(restraint["width"]) weights.append(restraint["weight"]) expDistances = numpy.array(expDistances) expErrors = numpy.array(expErrors) weights = numpy.array(weights) # setup Proteins anchorAcoords = [] for restraint in self.restraints: anchorAcoords.append(restraint["anchorAcoord"]) proteinAname = self.restraints[0]["proteinAname"] anchorAcalphas = self.restraints[0]["proteinAcalpha"] proteinA = Protein(anchorAcoords, anchorAcalphas, proteinAname) anchorBcoords = [] for restraint in self.restraints: anchorBcoords.append(restraint["anchorBcoord"]) proteinBname = self.restraints[0]["proteinBname"] anchorBcalphas = self.restraints[0]["proteinBcalpha"] proteinB = Protein(anchorBcoords, anchorBcalphas, proteinBname) # move both to origin proteinA.moveToOrigin(proteinA.labelAtomsCog) proteinB.moveToOrigin(proteinB.labelAtomsCog) ####### # evolve ####### zeroChromosome = Chromosome("None") zeroChromosome.genes = numpy.array([0, 0, 0, 0, 0, 0]) # setup populations print "setting up populations..." populations = [] for i in range(0, self.numberOfPopulations): if self.symmetry == "C2": population = Population(self.numberOfChromosomes, "C2") elif self.symmetry == "None": population = Population(self.numberOfChromosomes, "None") population.name = "%i" % (i + 1) populations.append(population) # put them into an environment and evolve environment1 = Environment(populations, proteinA, proteinB, expDistances, expErrors, weights, False, False, False) environment1.constraintNames = constraintNames #environment1.applySelectionPressure() #for population in environment1.populations: # population.log += population.chromosomes[0].printChromosomeWithoutClashes() self.processes = [] resultQueue = multiprocessing.Queue() progressQueue = multiprocessing.Queue() numberOfProcesses = len(environment1.populations) for idx, population in enumerate(environment1.populations): environment = copy.deepcopy(environment1) p = multiprocessing.Process(target=self.worker1, args=(environment, idx, resultQueue, progressQueue)) p.start() self.processes.append(p) cycles = 0 maxCycles = numberOfProcesses * (self.numberOfGenerations + self.numberOfRigidBodyCycles) #while True: # cycles += progressQueue.get() # progress = cycles/(numberOfProcesses*(self.numberOfGenerations+self.numberOfRigidBodyCycles)) # #send message to main thread # wx.CallAfter(pub.sendMessage, "docking.update", progress=progress) # if cycles >= maxCycles: # break resultsList = [resultQueue.get() for p in self.processes] for p in self.processes: p.join() environment1.populations = resultsList # create solutions print "" print "Solutions:" nonClashingSolution = 1 clashingSolution = 1 for population in environment1.populations: #createPseudoatom(self.labelPositionsProteinB, "tmpSolution-labels", 1) tmpProtein = Protein(proteinB.originalLabelAtoms, proteinB.originalLabelAtoms, "tmpSolution-labels") solution = population.chromosomes[0] # print solution.printChromosomeWithClashes() if solution.clashes <= 5: nameOfSolution = "%s-%i_sol-%i" % (self.objectPrefix, self.dockingRunNumber, nonClashingSolution) solution.name = nameOfSolution proteinB.moveInPymol(nameOfSolution, solution, 1) #tmpProtein.moveInPymol("%s-labels" % nameOfSolution, solution, 1) cmd.translate(list(proteinA.labelAtomsCog.reshape(-1, )), nameOfSolution, 1, 0, None) #cmd.translate(list(proteinA.labelAtomsCog.reshape(-1,)), "%s-labels" % nameOfSolution, 1, 0, None) nonClashingSolution += 1 elif solution.clashes > 5: nameOfSolution = "%s-%i_clash-%i" % ( self.objectPrefix, self.dockingRunNumber, clashingSolution) solution.name = nameOfSolution proteinB.moveInPymol(nameOfSolution, solution, 1) #tmpProtein.moveInPymol("%s-labels" % nameOfSolution, solution, 1) cmd.translate(list(proteinA.labelAtomsCog.reshape(-1, )), nameOfSolution, 1, 0, None) #cmd.translate(list(proteinA.labelAtomsCog.reshape(-1,)), "%s-labels" % nameOfSolution, 1, 0, None) clashingSolution += 1 cmd.group("%s-%i" % (self.objectPrefix, self.dockingRunNumber), "%s-%i*" % (self.objectPrefix, self.dockingRunNumber)) #cmd.set_view(myView) return environment1, self.settings
def random_walk(protein_string, N_tries, dimension, matrix_size): ''' Runs a random walk algorithm in which N_tries proteins are randomly created from a string of amino acid types. The best conformation, the one that has the lowest energy, is saved and returned along with a dictionary of all N_tries energy counts and the minimal matrix sizes for the folded protein. ''' # Create a protein object with a specific matrix size protein = Protein(matrix_size, dimension) # Place the first two amino acids protein.place_first_two(protein_string) location = protein.last_acid energy_min = 1 energy_counter = {} matrix_sizes = {} # Try to fold N_tries proteins for i in range(N_tries): # Print an update for every 1000th protein if (i + 1) % 1000 == 0: print(f"{i + 1}th protein folded") # Remove acids until only the first two are left while protein.length > 2: protein.remove_acid(0) # Run the next random walk solution_found, protein = walk(protein, protein_string, location) while not solution_found: while protein.length > 2: protein.remove_acid(0) solution_found, protein = walk(protein, protein_string, location) # When a complete protein has been created, get its energy if solution_found: energy = protein.energy # When its energy is the lowest energy yet, save the protein object if energy < energy_min: energy_min = energy protein_min = copy.deepcopy(protein) print(f"New minimum energy found: {energy_min}") # Add the energy to a dictionary counter energy_counter[energy] = energy_counter.get(energy, 0) + 1 # Determine the smallest matrix size needed for this protein min_matrix_size = protein.smallest_matrix() matrix_sizes[energy] = matrix_sizes.get(energy, {}) matrix_sizes[energy][min_matrix_size] = matrix_sizes[energy].get( min_matrix_size, 0) + 1 if protein_min: return protein_min, energy_counter, matrix_sizes else: exit("Error: No protein 'protein_min' to return")
def dock(self): #setup tables with distances, errors and weights expDistances = [] expErrors = [] weights = [] constraintNames = [] for restraint in self.restraints: constraintNames.append("%s-%s"%(restraint["anchorAname"], restraint["anchorBname"])) expDistances.append(restraint["distance"]) expErrors.append(restraint["width"]) weights.append(restraint["weight"]) expDistances = numpy.array(expDistances) expErrors = numpy.array(expErrors) weights = numpy.array(weights) # setup Proteins anchorAcoords = [] for restraint in self.restraints: anchorAcoords.append(restraint["anchorAcoord"]) proteinAname = self.restraints[0]["proteinAname"] anchorAcalphas = self.restraints[0]["proteinAcalpha"] proteinA = Protein(anchorAcoords, anchorAcalphas, proteinAname) anchorBcoords = [] for restraint in self.restraints: anchorBcoords.append(restraint["anchorBcoord"]) proteinBname = self.restraints[0]["proteinBname"] anchorBcalphas = self.restraints[0]["proteinBcalpha"] proteinB = Protein(anchorBcoords, anchorBcalphas, proteinBname) # move both to origin proteinA.moveToOrigin(proteinA.labelAtomsCog) proteinB.moveToOrigin(proteinB.labelAtomsCog) # setup populations print "Starting..." populations = [] for i in range(0, self.numberOfPopulations): if self.symmetry != "None": population = Population(self.numberOfChromosomes, self.symmetry) elif self.symmetry == "None": population = Population(self.numberOfChromosomes, "None") population.name = "%i" % (i + 1) populations.append(population) # put them into an environment and evolve environment1 = Environment(populations, proteinA, proteinB, expDistances, expErrors, weights, self.scoreClashes) environment1.constraintNames = constraintNames self.processes = [] resultQueue = multiprocessing.Queue() progressQueue = multiprocessing.Queue() numberOfProcesses = len(environment1.populations) if os.name != "nt": for idx, population in enumerate(environment1.populations): environment = copy.deepcopy(environment1) p = multiprocessing.Process(target = self.worker, args = (environment, idx, resultQueue, progressQueue)) p.start() self.processes.append(p) cycles = 0 maxCycles = numberOfProcesses * (self.numberOfGenerations + self.numberOfRigidBodyCycles) while True: cycles += progressQueue.get() progress = cycles/(numberOfProcesses*(self.numberOfGenerations+self.numberOfRigidBodyCycles)) #send message to main thread wx.CallAfter(pub.sendMessage, "docking.update", progress=progress) if cycles >= maxCycles: break resultsList = [resultQueue.get() for p in self.processes] for p in self.processes: p.join() environment1.populations = resultsList else: print "Windows... Using 1 core." self.worker(environment1, -1, resultQueue, progressQueue) self.abort = False # name solutions nonClashingSolution = 1 clashingSolution = 1 for population in environment1.populations: solution = population.chromosomes[0] if solution.clashes <= 5: nameOfSolution = "%s-%i_sol-%i" % (self.objectPrefix, self.dockingRunNumber, nonClashingSolution) solution.name = nameOfSolution nonClashingSolution += 1 elif solution.clashes > 5: nameOfSolution = "%s-%i_clash-%i" % (self.objectPrefix, self.dockingRunNumber, clashingSolution) solution.name = nameOfSolution clashingSolution += 1 return environment1, self.settings
for atom2 in atom2atom[atom1]: res2 = atom2.residue if res2 != res1: try: res2res[res1].add(res2) except KeyError: res2res[res1] = set([res2]) ret = sum([len(res2res[x]) for x in res2res.keys()])/2, \ avg_coord_num(res2res) # print 'DEBUG: csuAvgCoordNum end. Returning '+str(ret) return ret #def picContactNumber(protein): # raise NotImplementedError if __name__ == "__main__": import sys if len(sys.argv) == 1 or '-h' in sys.argv: print "Usage: python coordNum.py pdbFileName1 [pdbFileName2 ...]" else: #print "Protein name\tContact Number (Marek)\tAvg Coord Number \ # (Marek)\tContact Number (CSU)\tAvg Coord Number (CSU)" for f in sys.argv[1:]: p = Protein(f) name = f.split('/')[-2].split('.')[0] marek = marek_avg_coord_num(p) csu = csu_avg_coord_num(p, total_layers = 14) print '%(n)5s %(ncm)3d %(cnm)6.3f %(ncc)3d %(cnc)6.3f' % \ {'n': name, 'ncm': marek[0], 'cnm': marek[1], \ 'ncc': csu[0], 'cnc': csu[1]}
# -*- coding: utf-8 -*- """ Created on Wed Feb 28 17:00:23 2018 @author: HOS """ from numpy import * from protein import Protein from grid import Grid import matplotlib.pyplot as plt N = 18 d = 15 Prt = Protein(d, N) Grid = Grid(d, N, Prt) #folding for i in range(30): Prt.tryRotate() Grid.update() #Grid.easyPlot() Grid.showOff()
#!/usr/bin/env python import sys import requests as sender from flask import Flask, abort, request import json import threading from protein import Protein # Globals app = Flask(__name__) p = Protein( "NC_000852,NC_007346,NC_008724,NC_009899,NC_014637,NC_020104,NC_023423,NC_023640,NC_023719,NC_027867" ) # Design # ------ # bio-engine (port 7000) web-app (ports 8000-) # | | # | (open port) # | <---- POST (sequence) -------- | # | ----- PUT (found protein) ---> | # | ----- PUT (found protein) ---> | # | ----- DELETE (finished) -----> | # | (close port) def match_provider(port, sequence): protein_codes = p.get_code_list()
def getStructures(name): return make_response(jsonify(Protein.getStructures(name)))
def remove_acids(protein: Protein, cut_start: int, cut_end: int): ''' Removes acids between two points ''' for i in range(cut_start + 1 , cut_end): protein.remove_acid_index(i)
import pickle from lxml import etree from protein import Protein # Goes through the Swissprot database xml and parses all the relevant data prefix = "{http://uniprot.org/uniprot}" proteins = [] for event, element in etree.iterparse("in/uniprot_sprot.xml", tag=prefix + "entry"): prot = Protein() nonEukaryotProtein = False for child in element.getchildren(): if nonEukaryotProtein: break # Gets Uniprot ID # The xml lists old IDs as well, only the first one is relevant and the rest are ignored if child.tag == prefix + "accession": if not prot.uniprot_id: prot.uniprot_id.add(child.text) # Gets names elif child.tag.endswith("protein"): for entry in child.getchildren(): if entry.tag == prefix + "recommendedName" or entry.tag == prefix + "alternativeName":
parser.add_argument('--f', type=int, default=10, help='parameter for the simplification of points mesh') parser.add_argument('--T', type=float, default=0.9, help='ligandability threshold') parser.add_argument('--batch', type=int, default=32, help='batch size') parser.add_argument('--voxel_size', type=float, default=1.0, help='size of voxel in angstrom') parser.add_argument('--protonate', action='store_true', help='whether to protonate or not the input protein') parser.add_argument('--expand', action='store_true', help='whether to expand on residue level the extracted binding sites') parser.add_argument('--discard_points', action='store_true', help='whether to output or not the computed surface points') return parser.parse_args() args = parse_args() if not os.path.exists(args.prot_file): raise IOError('%s does not exist.' % args.prot_file) if not os.path.exists(args.model_path): raise IOError('%s does not exist.' % args.model_path) if not os.path.exists(args.output): os.makedirs(args.output) prot = Protein(args.prot_file,args.protonate,args.expand,args.f,args.output, args.discard_points) nn = Network(args.model_path,args.model,args.voxel_size) lig_scores = nn.get_lig_scores(prot,args.batch) extractor = Bsite_extractor(args.T) extractor.extract_bsites(prot,lig_scores)