def parse_modification_rules(separator=' | '): """ Prepares a rule for adding a modification. Rules describe which fragments add and how to do this to obtain a residue with given modification. Returns dict of list of dicts with rules for adding a single fragment. Keys in each rule dict: ['modification_name', 'original_base', 'remove', 'moved_link_atoms', 'fixed_link_atoms', 'fragment_file_name', 'pdb_abbrev'] """ rules = {} try: infile = open(ADDING_MODIFICATION_RULES_PATH) except IOError: log.write_message('File does not exist: %s ' % ADDING_MODIFICATION_RULES_PATH) return {} for line in infile: line = line.strip().split(separator) if len(line) >= 7: mod_name = line[0].strip() rules.setdefault(mod_name, []) rule = {} rule['modification_name'] = line[0] rule['original_base'] = line[1] rule['remove'] = line[2] rule['moved_link_atoms'] = line[3].split(',') rule['fixed_link_atoms'] = line[4].split(',') rule['fragment_file_name'] = line[5] rule['pdb_abbrev'] = line[6] rules[mod_name].append(rule) return rules
def renumber_chain(self, start_residue='1'): """ Changes chain numeration. Provides continous chain numeration starting from given number (by default 1) Arguments: * start number (by default 1) """ try: num = int(start_residue) except ValueError: raise ModernaStructureError( 'Cannot start numeration with %s, requires number' % str(start_residue)) self.sort_residues() counter = num temp_resi_list = [] for resi in self: temp_resi = resi self.remove_residue(resi.identifier) temp_resi.change_number(str(counter)) temp_resi.id = (temp_resi.id[0], temp_resi.id[1], ' ') temp_resi_list.append(temp_resi) counter += 1 for resi in temp_resi_list: self.add_residue(resi) self.sort_residues() log.write_message( 'Chain was renumbered. The first residue is now %s.' % str(start_residue))
def remove_modification(resi): """Removes modification from a residue.""" old_name = resi.long_abbrev m = ModificationRemover() m.remove_modification(resi) log.write_message('Residue %s: modification removed (%s ---> %s).' % (resi.id, old_name, resi.long_abbrev))
def get_sequence(structure): """*get_sequence(structure)* Retrieves the one-letter-sequence from the coordinates of a structure (template or model). In the sequence, standard RNA bases are denoted by upper case letters, DNA bases by lowercase. For many modifications, one-letter ASCII abbreviations exist (according to McCloskey). All nucleotides that do not have a one-letter abbreviation are represented by the x letter. Nucleotides that cannot be recognized (e.g. base missing) are represented by the '.' character. For determining the sequence, the residues are processed according to their numbers. If an unusually long bond is found anywhere in the backbone between two bases, an additional "_" symbol is inserted in the sequence to mark this discontinuity. Also see http://www.genesilico.pl/modomics :Arguments: * structure - a Template or RnaModel object """ # KR: sequence should be written to logfile. # KR: discontinuities should get an extra logfile message. # MM: is it good place for that or should be sone in get_sequence in ModernaStructure? structure = validate_structure(structure) seq = structure.get_sequence() log.write_message('Checking sequence: \n%s\n' %seq.seq_with_modifications) if '_' in seq.seq_with_modifications: log.write_message('SEQUENCE IS DISCONTINUOUS !!!\n') return seq
def check_matches(self, guide, apos, dqueue, result): """Reacts on matches and mismatches.""" temp, targ = apos.template_letter, apos.target_letter if temp.short_abbrev == ANY_RESIDUE: log.write_message(".. incomplete template residue in alignment position %i (%s/%s) - alignment edited." \ % (dqueue.i_ali+1, guide, temp)) result.append((targ, guide)) elif guide.short_abbrev == ANY_RESIDUE: log.write_message(".. unknown residue in alignment position %i (%s/%s) - alignment edited." \ % (dqueue.i_ali+1, guide, temp)) result.append((targ, guide)) elif guide.original_base != temp.original_base: log.write_message(".. different nucleobase in alignment position %i (%s/%s) - please check manually." \ % (dqueue.i_ali+1, guide, temp)) result.append((targ, temp)) elif guide != temp and guide.original_base == temp.original_base: log.write_message(".. different modified base found in alignment position %i (%s/%s) - alignment edited." \ % (dqueue.i_ali+1, guide, temp)) result.append((targ, guide)) elif guide == temp: result.append((targ, guide)) else: # there may be cases not covered - report and ignore them log.write_message(".. don't know what to do about alignment position %i (%s/%s) - ignored." \ % (dqueue.i_ali+1, guide, temp)) result.append((targ, temp)) dqueue.next_both()
def fix_template_seq(self, seq): """Adjusts the template sequence in the alignment to the given guide sequence.""" # validate input seq if not self.is_seq_fixable(seq): return log.write_message( "\nTemplate and alignment sequences differ - trying to fix small differences.\n" ) log.write_message("template : %s" % seq) log.write_message("alignment (before) : %s\n"%\ self.align.aligned_template_seq) # iterate through positions dqueue = PairQueue(self.align, seq) result = [] while dqueue.has_more(): guide, apos = dqueue.pair if apos.has_gap(): self.check_gaps(guide, apos, dqueue, result) elif guide == BREAK or apos.template_letter == BREAK: self.check_breaks(guide, apos, dqueue, result) else: self.check_matches(guide, apos, dqueue, result) self.set_aligned_sequences(result) log.write_message("\ntemplate : %s" % seq) log.write_message("alignment (after) : %s\n" % str(self.align))
def add_modification(resi, long_abbrev): """Adds modification with given abbreviation""" old_name = resi.long_abbrev add = ModificationAdder() add.add_modification(resi, long_abbrev) log.write_message('Residue %s: modification added (%s ---> %s).' % (resi.identifier, old_name, long_abbrev))
def run_fccd(self, torsions=DEFAULT_TORSIONS, threshold=0.05, maxit=200): """Runs the FCCDLoopCloser algorithm.""" r1 = self.resi1 r2 = self.resi2 r2a = self.create_resi_from_torsions(torsions) fixed = [r2["C4'"], r2["C3'"], r2["C2'"]] moving = [ r1["C4'"], r1["C3'"], r1["O3'"], r2a["P"], r2a["O5'"], r2a["C5'"], r2a["C4'"], r2a["C3'"], r2a["C2'"] ] lc = FCCDLoopCloser(moving, fixed) msg, rmsd, it = lc.run_fccd(threshold, maxit) lc.copy_vectors_to_atoms() # edit atoms that dont move r2a.detach_child("C4'") r2a.detach_child("C3'") r2a.detach_child("C2'") r2a.add(r2["C4'"]) r2a.add(r2["C3'"]) r2a.add(r2["C2'"]) # add moved residue to structure self.struc.remove_residue(r2.identifier) self.struc.add_residue(r2a) self.resi2 = self.struc[r2a.identifier] # gets clone of r2a! log.write_message('.. result: %s' % msg) log.write_message(' rmsd: %6.3f iterations: %i' % (rmsd, it))
def check_letters_in_residue_numeration(self): """Checks whether identifiers of residues contain any letters.""" for resi in self: if re.findall('[a-z,A-Z]', resi.identifier): log.write_message( 'Structure contains residues with letters in numeration eg.: %s.' % resi.identifier) return True return False
def remove_one_modification_copy(self, residue, number_in_model): """ """ temp_resi = RNAResidue(residue) num = number_in_model or temp_resi.number remove_modification(temp_resi) self.add_residue(temp_resi, str(num)) log.write_message('Residue %s: modification removed (%s ---> %s).' % (num, residue.long_abbrev, temp_resi.long_abbrev))
def __init__(self, resi1, resi2, struc): self.resi1 = resi1 self.resi2 = resi2 self.struc = struc if not self.is_intact(): log.write_message( '''The backbone between residues %s and %s is not\ intact. Attempting rebuilding.''' % (resi1.identifier, resi2.identifier)) self.build()
def generate_lir_db(self): """ Generates dict with all posible records for structures from given directory { pdb_code : {chian_name : [residues_list] } } """ pdb_files = self.input_data.keys() #self.get_files_list() for pdbfile in pdb_files: log.write_message(pdbfile) self.all_records += (self.get_records_from_one_structure(pdbfile))
def add_one_modification_copy(self, residue, modification_long_abbrev, number_in_model): """ """ temp_res = RNAResidue(residue) num = number_in_model or temp_res.identifier add_modification(temp_res, modification_long_abbrev) self.add_residue(temp_res, num, False) log.write_message('Residue %s: modification added (%s ---> %s).' % (num, residue.long_abbrev, modification_long_abbrev))
def build_suites(self): """Tries different combinations of starting angles.""" log.write_message(".. trying %i different suite torsion combinations."\ %(len(TORSIONS)-1)) for torsions in TORSIONS[1:]: self.run_fccd(torsions=torsions, threshold=0.2, \ maxit=100) phosb = PhosphateBuilder(self.resi1, self.resi2) phosb.add_op12() if self.is_intact(): return
def read_alignment(data, shrink=DEFAULT_SHRINK): parser = RNAAlignmentParser() if os.access(data, os.F_OK): alignment = parser.get_alignment_from_file(data, shrink) elif data.startswith('>'): alignment = parser.get_alignment(data, shrink) else: raise AlignmentError( 'Alignment not in FASTA format or file does not exist: %s' % data) log.write_message('Alignment loaded from %s:%s' % (data, str(alignment))) return alignment
def fix_backbone(self): """Attempts to fix all backbone breaks.""" log.write_message('\nChecking structure for interrupted backbones:') for i in range(2): # run twice to improve result #TODO: create better method for backbone fixing. last = None for resi in self: if last: self.fix_backbone_between_resis(last, resi) last = resi log.write_message('Checking and repairing backbones finished.')
def is_seq_fixable(self, seq): """Returns True if the guide sequence can be used for fixing.""" if self.is_template_identical(seq): return False tseq = self.align.template_seq if len(seq.seq_without_breaks) != len(tseq.seq_without_breaks): log.write_message( "\nTemplate and alignment sequences differ in length - please check them manually.\n" ) return False return True
def build(self): """Runs procedures to construct backbone between two residues.""" methods = [self.build_phosphate, self.build_fccd, self.build_suites] for method in methods: method() if self.is_intact(): log.write_message('Backbone reconstruction successful.') return self.write_bb_status() log.write_message('WARNING: Backbone reconstruction failed. \ The structure needs to be refined.\n')
def get_secstruc(structure): """*get_secstruc(structure)* Retrieves the dot-bracket secondary structure from the coordinates of a structure (template or model). Base pairs (only Watson-Crick pairings AU and GC, and GU Wobble pairs) \ are indicated by round brackets, all other residues by dots. :Arguments: * structure - a Template or RNAModel object """ secstruc = structure.get_secstruc() log.write_message('Secondary structure: \n%s\n' %secstruc) return secstruc
def check_isostericity(self, bp1, bp2, interact_type, max_value=1.0): """ Returns True if basepair1 is isosteric to basepair2 when interaction type is interact_type """ try: result = self.matrices[bp1][interact_type][bp2] log.write_message(bp1 + "->" + bp2 + " (" + interact_type + ")") return result <= max_value and result != None except: log.write_message( "No information in IsostericityMatrices about: " + bp1 + "->" + bp2 + " (" + interact_type + ")") return False
def exchange_base(resi, new_name): """ Exchanges base in given residue. Arguments: - residue - new residue name (A, G, C or U) """ old_name = resi.long_abbrev bex = BaseExchanger() bex.exchange_base(resi, new_name) log.write_message( 'Residue %s: base exchanged (%s ---> %s), residue added to model.' % (resi.identifier, old_name, new_name))
def write_secstruc(struct, file_name='secstruc.vienna'): """*write_secstruc(model, file_name='secstruc.vienna')* Writes secondary struture to a vienna file. :Arguments: * Structure object (model or template) * name of the vienna file (optional; by default secstruc.vienna) """ struct = validate_structure(struct) file_name = validate_filename(file_name) struct.write_secstruc(file_name) log.write_message('Secondary structure written to %s'%file_name)
def copy_residue_backbone(self, residue, number_in_model=None, strict=True): """ """ temp_res = RNAResidue(residue) num = number_in_model or str( temp_res.id[1]).strip() + temp_res.id[2].strip() make_backbone_only_residue(temp_res) self.add_residue(temp_res, num, strict=strict) log.write_message( 'Residue %s: residues backbone atoms copied from template to model.' % num)
def find_fragment_candidates(self): """Returns a FragmentCandidates object.""" candidates = FragmentCandidates(self.query) candidates.create_initial_fragment_set() self.scoring.set_fast_scoring() candidates.make_fast_scoring(self.scoring, self.candidates_number) self.scoring.set_advanced_scoring() candidates.make_advanced_scoring(self.scoring) self.log_candidates(candidates) if len(candidates) > 0 and candidates[0].score > 10000: log.write_message( "\nNo fragment candidate with aproppriate secondary structure was found.\nThe next best fragment is inserted.\n" ) return candidates
def write_fragment_candidates(fragment_candidates, output_directory='fragment_candidates', with_model=True, fragment_log_file=True): """*write_fragment_candidates(fragment_candidates, output_directory='fragment_candidates')* Writes a list of fragment candidates to a set of PDB files. The candidates are numbered according to the geometrical fit of their backbones. :Arguments: * fragment candidates list (obtained by the find_fragment command) * output directory name """ fragment_candidates = validate_frag_candidate_list(fragment_candidates) output_directory = validate_path(output_directory) fragment_candidates.write_fragment_candidates(output_directory, True, with_model, False, fragment_log_file) log.write_message('Fragment candidates written to %s.' %output_directory)
def add_all_modifications_copy(self): """ """ if self.alignment and self.template: for ap in self.recipe.add_modifications: temp_resi = RNAResidue(self.template.template_residues[str( ap.template_position)]) old_name = temp_resi.long_abbrev add_modification(temp_resi, ap.target_letter.long_abbrev) self.add_residue(temp_resi) log.write_message( 'Residue %s: modification added (%s ---> %s).' % (temp_resi.identifier, old_name, temp_resi.long_abbrev)) else: raise RnaModelError('There is no template or/and alignmnt')
def write_model(model, pdb_file_name='moderna_model.pdb'): """*write_model(model, pdb_file_name='moderna_model.pdb')* Writes a model to a PDB file. The residues in the file are sorted. All residues keep their numbers as last assigned. :Arguments: * Structure object (model or template) * name of the PDB file (optional; by default moderna_model.pdb) """ model = validate_structure(model) pdb_file_name = validate_filename(pdb_file_name) if model.__class__ == RnaModel: model.refine_model() model.write_pdb_file(pdb_file_name) log.write_message('Model written to %s'%pdb_file_name)
def copy_residue(self, residue, number_in_model=None, strict=True): """ Copies a residue to a model in the given position. Arguments: - residue to copy (as a RNAResidue or PDB.Residue.Residue instance) - position in model (by default position in previous structure) """ temp_res = RNAResidue(residue) redit = ResidueEditor() redit.set_bfactor(temp_res, B_FACTOR_COPY) num = number_in_model or temp_res.identifier self.add_residue(temp_res, num, strict=strict) log.write_message( 'Residue %s: residue copied from template residue %s to model.' % (num, temp_res.identifier))
def check_breaks(self, guide, apos, dqueue, result): """Reacts on underscores in either of the sequences.""" temp, targ = apos.template_letter, apos.target_letter if guide == BREAK and temp == BREAK: result.append((targ, temp)) dqueue.next_both() elif guide == BREAK: log.write_message( ".. break in template in position %i added to alignment." % (dqueue.i_guide + 1)) result.append((GAP, guide)) dqueue.next_guide() else: log.write_message( ".. break in alignment in position %i is not in template - ignored." % (dqueue.i_ali + 1)) dqueue.next_ap()
def examine_structure(st, ex_log=None, verbose=True): """*examine_structure(structure)* Checks whether the given structure has any features that may cause any problems during the modeling process. The user needs to give a structure object, and optionally a name of the file the report is written to. :Arguments: * Stucture object * name of logfile (optional) """ struc = validate_structure(st) pc = PdbController(st) if ex_log: pc.write_log(ex_log) else: log.write_message(str(pc)) if verbose: print(pc) return pc