def align_strs_seq(env, segfile, alnfile, knowns, sequence, matrix_file, overhang=0, write_fit=False): """Align a single sequence with several structures""" # Read the sequences of structures from the specs in SEGFILE: aln = alignment(env, file=segfile, align_codes=knowns) # Only align structures if there's more than one: if len(aln) > 1: # do a multiple sequence alignment of structures: aln.malign(gap_penalties_1d=(-600, -400), overhang=overhang) # do a multiple structural alignment of structures: aln.malign3d(gap_penalties_3d=(0.0, 2.0), fit_atoms='CA', overhang=overhang, write_fit=write_fit) # remember the number of structures align_block = len(aln) # add the sequence of the unknown to the sequence/alignment arrays # containing the aligned structures: aln.append(file=segfile, align_codes=sequence) # align the last sequence with the fixed alignment of structures: aln.align(align_block=align_block, gap_penalties_1d=(-600, -400), overhang=overhang) # write the alignment to a file ALNFILE aln.write(file=alnfile) # do some sequence comparisons: aln.id_table(matrix_file) return aln
def sequence_srch(env, sequence, segfile, chains_list, toplib='${LIB}/top_heav.lib', search_randomizations=0, signif_cutoff=(4.0, 5.0)): """For a given target sequence, find all template chains in PDB. - align all of them together and write the alignment to a file. - calculate identity matrix and write it to a file. - calculate a dendrogram and write it to the .log file.""" penalties = (-600, -400) # This should be fine for significance tests with # default matrix as1.sim.mat overhang = 20 seqfile = '$(LIB)/pdball.pir' sdb = sequence_db(env, seq_database_file=seqfile, chains_list=chains_list, seq_database_format='PIR') aln = alignment(env, file=segfile, align_codes=sequence) sdb.search(aln, search_top_list=30, off_diagonal=9999, search_group_list='$(LIB)/pdb_40.grp', seq_database_file=seqfile, gap_penalties_1d=penalties, output='SHORT', signif_cutoff=signif_cutoff, search_randomizations=search_randomizations) if len(aln) > 1 or (len(aln) == 1 and aln[0].code != sequence): aln.write(file='alignment.tmp') aln.malign(overhang=overhang, off_diagonal=150, gap_penalties_1d=penalties) aln.malign3d(overhang=overhang, gap_penalties_3d=(0, 3), off_diagonal=150) blk = len(aln) aln.append(file=segfile, align_codes=sequence) # Only needed for ALIGN2D's PSA run env.libs.topology.read(file=toplib) aln.align2d(align_block=blk, gap_penalties_1d=(-450, 0), gap_penalties_2d=(0.35, 1.2, 0.9, 1.2, 0.6, 8.6, 1.2, 0, 0), max_gap_length=50, off_diagonal=150, overhang=overhang) aln.write(file=sequence + '.ali') aln.write(file=sequence + '.pap', alignment_format='PAP', alignment_features='HELIX BETA ACCESSIBILITY ' + \ 'STRAIGHTNESS CONSERVATION INDICES') mat = sequence + '.mat' aln.id_table(matrix_file=mat) env.dendrogram(matrix_file=mat, cluster_cut=-1.0)
def cluster(self, cluster_cut=1.5): """Cluster all output models, and output an optimized cluster average""" self.read_initial_model() aln = alignment(self.env) self.align_models(aln) if len(aln) == 0: log.error('cluster', 'No generated models - nothing to cluster!') aln.malign3d(gap_penalties_3d=(0, 3), fit=False) aln.append_model(mdl=self, align_codes='cluster', atom_files='cluster.opt') self.transfer_xyz(aln, cluster_cut=cluster_cut) self.write(file='cluster.ini') self.read_top_par() self.rd_restraints() self.create_topology(aln, sequence='cluster') atmsel = self._check_select_atoms() self.restraints.unpick_all() self.restraints.pick(atmsel) self.restraints.condense() edat = energy_data(copy=self.env.edat) edat.nonbonded_sel_atoms = 1 atmsel.energy(output='LONG', edat=edat) cg = conjugate_gradients() cg.optimize(atmsel, actions=actions.trace(5, 'cluster.deb'), max_iterations=self.max_var_iterations) atmsel.energy() self.write(file='cluster.opt') aln.compare_structures(fit=True)
def refine(self, atmsel, actions): """Refine the optimized model with MD and CG""" # Save the current model: if self.fit_in_refine != 'NO_FIT': self.write(file='TO_BE_REFINED.TMP') # Possibly skip selecting hot atoms only and optimize all atoms: if self.refine_hot_only: self.initial_refine_hot(atmsel) # Do simulated annealing MD: if self.md_level: self.md_level(atmsel, actions) # Possibly skip 'HOT CG' after MD: if self.refine_hot_only: self.final_refine_hot(atmsel) # Get a final conjugate gradients refined structure: cg = conjugate_gradients() cg.optimize(atmsel, max_iterations=200, output=self.optimize_output, actions=actions) # Evaluate gross changes between the initial and final refined model: if 'NO_FIT' not in self.fit_in_refine: aln = alignment(self.env) mdl2 = read_model(file='TO_BE_REFINED.TMP') casel = selection(self).only_atom_types('CA') casel.superpose(mdl2, aln) casel = selection(self) casel.superpose(mdl2, aln) modfile.delete('TO_BE_REFINED.TMP')
def read_alignment(self, aln=None): """Read the template-sequence alignment needed for modeling""" if aln is None: aln = alignment(self.env) aln.clear() aln.append(file=self.alnfile, align_codes=self.knowns + [self.sequence]) return aln
def make(self, atmsel, restraint_type, spline_on_site, residue_span_range=(0, 99999), residue_span_sign=True, restraint_sel_atoms=1, basis_pdf_weight='LOCAL', basis_relative_weight=0.05, intersegment=True, dih_lib_only=False, spline_dx=0.5, spline_min_points=5, spline_range=4.0, mnch_lib=1, accessibility_type=8, surftyp=1, distngh=6.0, aln=None, edat=None, io=None): """Calculates and selects new restraints of a specified type""" if not hasattr(atmsel, "get_atom_indices"): raise TypeError("First argument needs to be an atom selection") if not isinstance(restraint_type, str): raise TypeError("restraint_type must be a string") restyp = restraint_type.upper() (inds, mdl) = atmsel.get_atom_indices() if mdl is None: raise ValueError("selection is empty") if mdl is not self.__mdl: raise ValueError("selection refers to a different model") if edat is None: edat = self.__mdl.env.edat if io is None: io = self.__mdl.env.io if aln is None: if restyp in \ ('CHI1_DIHEDRAL', 'CHI2_DIHEDRAL', 'CHI3_DIHEDRAL', 'CHI4_DIHEDRAL', 'PHI_DIHEDRAL', 'PSI_DIHEDRAL', 'OMEGA_DIHEDRAL', 'PHI-PSI_BINORMAL'): raise ValueError("You must provide an alignment for this " + "restraint type") else: aln = alignment.alignment(self.__mdl.env) func = _modeller.mod_restraints_make return func(self.__mdl.modpt, edat.modpt, aln.modpt, io.modpt, self.__mdl.env.libs.modpt, inds, (), (), residue_span_range, restraint_type, restraint_sel_atoms, 0, basis_pdf_weight, 0, 0., basis_relative_weight, spline_on_site, residue_span_sign, intersegment, dih_lib_only, spline_dx, spline_min_points, spline_range, mnch_lib, accessibility_type, (0, 0), (0, 0, 0), surftyp, distngh, 0.0)
def principal_components(env, family, cluster_cut): """Does principal components on family""" aln = alignment(env, file=family + '.ali') mat = family + '.mat' aln.id_table(matrix_file=mat) env.principal_components(matrix_file=mat, file=family + '.dat') env.dendrogram(matrix_file=mat, cluster_cut=cluster_cut)
def fit(env, model, code, model2, code2, alnfile, model2_fit): """Superposes model2 on model, and writes out a file with model2 superposed on model.""" m1 = modelobj(env, file=model) m2 = modelobj(env, file=model2) aln = alignment(env, file=alnfile, align_codes=(code, code2)) atmsel = selection(m1).only_atom_types('CA') atmsel.superpose(m2, aln) m2.write(file=model2_fit)
def dihedrals(atmsel): """Randomize dihedrals""" mdl = atmsel.get_model() aln = alignment(mdl.env, file=mdl.alnfile, align_codes=mdl.knowns+[mdl.sequence]) # Just in case, generate topology again (ROTATE needs bonds) # (could replace with GENERATE_TOPOLOGY if no special patches) mdl.create_topology(aln) # Optimize all dihedral angles: atmsel.rotate_dihedrals(change='RANDOMIZE', deviation=mdl.deviation, dihedrals=('phi', 'psi', 'omega', 'chi1', 'chi2', 'chi3', 'chi4'))
def build_sequence(self, sequence, special_patches=None, patch_default=True, blank_single_chain=True): """Build an extended chain from a string of one-letter residue codes""" a = alignment.alignment(self.env) a.append_sequence(sequence, blank_single_chain) self.clear_topology() self.generate_topology(a[0], patch_default=patch_default, blank_single_chain=blank_single_chain) if special_patches: special_patches(self) self.build(initialize_xyz=True, build_method='INTERNAL_COORDINATES') self.prottyp = 'structure'
def read_xyz(mdl, aln): """Read in the initial structure from an existing file""" # Create two copies of the template sequence: a = alignment(mdl.env) a.append(file=mdl.alnfile, align_codes=[mdl.sequence]*2) # Use the initial model as the first structure in the alignment: a[0].prottyp = 'structureX' a[0].atom_file = a[0].code = mdl.my_inifile mdl.read_top_par() mdl.create_topology(aln) # Get model coordinates from the initial model, making sure that the # sequence is correct and any missing atoms are filled in: mdl.transfer_xyz(a, cluster_cut=-1.0) mdl.build(initialize_xyz=False, build_method='INTERNAL_COORDINATES')
def fit_models_on_template(self): """Superpose each of the generated models on the templates""" aln = alignment(self.env) aln.append(file=self.alnfile, align_codes=self.knowns) self.align_models(aln) # To take care of the '.' in segment specs: aln.write(file='.tmp.ali', alignment_format='PIR') codes = [seq.code for seq in aln] aln.read(file='.tmp.ali', alignment_format='PIR', align_codes=codes) modfile.delete('.tmp.ali') aln.compare_structures(fit=True, output='SHORT', fit_atoms='CA') aln.malign3d(gap_penalties_3d=(0, 3), write_whole_pdb=False, write_fit=True, fit=False, fit_atoms='CA', current_directory=True, edit_file_ext=(self.pdb_ext, '_fit.pdb'), output='SHORT')