def AverageDistanceMatrixFromTraj(t, sele, first=0, last=-1): """ This function calcultes the distance between each pair of atoms in **sele**, averaged over the trajectory **t**. :param t: the trajectory :param sele: the selection used to determine the atom pairs :param first: the first frame of t to be used :param last: the last frame of t to be used :type t: :class:`~ost.mol.CoordGroupHandle` :type sele: :class:`~ost.mol.EntityView` :type first: :class:`int` :type last: :class:`int` :return: a numpy N\ :subscript:`pairs`\ xN\ :subscript:`pairs` matrix, where N\ :subscript:`pairs` is the number of atom pairs in **sele**. """ try: import numpy as npy except ImportError: LogError("Function needs numpy, but I could not import it.") raise n_atoms = sele.GetAtomCount() M = npy.zeros([n_atoms, n_atoms]) for i, a1 in enumerate(sele.atoms): for j, a2 in enumerate(sele.atoms): if j > i: continue d = ost.mol.alg.AnalyzeDistanceBetwAtoms( t, a1.GetHandle(), a2.GetHandle())[first:last] M[i, j] = npy.mean(d) M[j, i] = npy.mean(d) return M
def DistanceMatrixFromPairwiseDistances(distances, p=2): """ This function calculates an distance matrix M(N\ :subscript:`frames`\ xN\ :subscript:`frames`\ ) from the pairwise distances matrix D(N\ :subscript:`pairs`\ xN\ :subscript:`frames`\ ), where N\ :subscript:`frames` is the number of frames in the trajectory and N\ :subscript:`pairs` the number of atom pairs. M[i,j] is the distance between frame i and frame j calculated as a p-norm of the differences in distances from the two frames (distance-RMSD for p=2). :param distances: a pairwise distance matrix as obtained from :py:func:`~mol.alg.trajectory_analysis.PairwiseDistancesFromTraj` :param p: exponent used for the p-norm. :return: a numpy N\ :subscript:`frames`\ xN\ :subscript:`frames` matrix, where N\ :subscript:`frames` is the number of frames. """ try: import numpy as npy n1 = distances.shape[0] n2 = distances.shape[1] dist_mat = npy.identity(n2) for i in range(n2): for j in range(n2): if j <= i: continue d = (((abs(distances[:, i] - distances[:, j])**p).sum()) / float(n1))**(1. / p) dist_mat[i, j] = d dist_mat[j, i] = d return dist_mat except ImportError: LogError("Function needs numpy, but I could not import it.") raise
def _DoLoad(self): split_ids = [] for p in str(self._line.text()).split(','): for k in p.split(): if len(k.strip()): split_ids.append(k.strip()) self._line.setText('') for split_id in split_ids: try: ent = RemoteLoad(split_id, from_repo=self._current_repo) except Exception as e: LogError(str(e)) continue g = gfx.Entity(split_id, ent) try: gfx.Scene().Add(g) except Exception as e: LogError(str(e))
def __CalculateSurface(self,ent_list,name,msms_exe,density, radius,selection,noh,nohet,nowat): for entity in ent_list: if isinstance(entity, gfx.Entity): try: s=msms.CalculateSurface(entity.view, msms_exe=msms_exe, density=density, radius=radius, selection=selection, no_hydrogens=noh, no_hetatoms=nohet, no_waters=nowat)[0] gfx.Scene().Add(gfx.Surface("%s_%s"%(entity.GetName(),name),s)) except (RuntimeError, msms.MsmsProcessError): LogError("WARNING: Surface could not be calculated") return except UserWarning: LogError("WARNING: Entry with the same name already present in scene") return
def AnalyzeDistanceFluctuationMatrix(t, sele, first=0, last=-1): try: import numpy as npy except ImportError: LogError("Function needs numpy, but I could not import it.") raise n_atoms = sele.GetAtomCount() M = npy.zeros([n_atoms, n_atoms]) for i, a1 in enumerate(sele.atoms): for j, a2 in enumerate(sele.atoms): if i > j: continue d = ost.mol.alg.AnalyzeDistanceBetwAtoms( t, a1.GetHandle(), a2.GetHandle())[first:last] M[j, i] = npy.std(d) M[i, j] = npy.std(d) return M
def RMSD_Matrix_From_Traj(t, sele, first=0, last=-1, align=True, align_sele=None): """ This function calculates a matrix M such that M[i,j] is the RMSD (calculated on **sele**) between frames i and j of the trajectory **t** aligned on sele. :param t: the trajectory :param sele: the selection used for alignment and RMSD calculation :param first: the first frame of t to be used :param last: the last frame of t to be used :type t: :class:`~ost.mol.CoordGroupHandle` :type sele: :class:`~ost.mol.EntityView` :type first: :class:`int` :type last: :class:`int` :return: Returns a numpy N\ :subscript:`frames`\ xN\ :subscript:`frames` matrix, where N\ :subscript:`frames` is the number of frames. """ if not align_sele: align_sele = sele try: import numpy as npy if last == -1: last = t.GetFrameCount() n_frames = last - first rmsd_matrix = npy.identity(n_frames) for i in range(n_frames): if align: t = ost.mol.alg.SuperposeFrames(t, align_sele, begin=first, end=last, ref=i) eh = t.GetEntity() t.CopyFrame(i) rmsd_matrix[i, :] = ost.mol.alg.AnalyzeRMSD(t, sele, sele) if i == 0: last = last - first first = 0 return rmsd_matrix except ImportError: LogError("Function needs numpy, but I could not import it.") raise
def PairwiseDistancesFromTraj(t, sele, first=0, last=-1, seq_sep=1): """ This function calculates the distances between any pair of atoms in **sele** with sequence separation larger than **seq_sep** from a trajectory **t**. It return a matrix containing one line for each atom pair and N\ :subscript:`frames` columns, where N\ :subscript:`frames` is the number of frames in the trajectory. :param t: the trajectory :param sele: the selection used to determine the atom pairs :param first: the first frame of t to be used :param last: the last frame of t to be used :param seq_sep: The minimal sequence separation between atom pairs :type t: :class:`~ost.mol.CoordGroupHandle` :type sele: :class:`~ost.mol.EntityView` :type first: :class:`int` :type last: :class:`int` :type seq_sep: :class:`int` :return: a numpy N\ :subscript:`pairs`\ xN\ :subscript:`frames` matrix. """ try: import numpy as npy if last == -1: last = t.GetFrameCount() n_frames = last - first n_var = 0 for i, a1 in enumerate(sele.atoms): for j, a2 in enumerate(sele.atoms): if not j - i < seq_sep: n_var += 1 #n_var=sele.GetAtomCount() #n_var=(n_var-1)*(n_var)/2. dist_matrix = npy.zeros(n_frames * n_var) dist_matrix = dist_matrix.reshape(n_var, n_frames) k = 0 for i, a1 in enumerate(sele.atoms): for j, a2 in enumerate(sele.atoms): if j - i < seq_sep: continue dist_matrix[k] = ost.mol.alg.AnalyzeDistanceBetwAtoms( t, a1.GetHandle(), a2.GetHandle())[first:last] k += 1 return dist_matrix except ImportError: LogError("Function needs numpy, but I could not import it.") raise
def DistRMSDFromTraj(t, sele, ref_sele, radius=7.0, average=False, seq_sep=4, first=0, last=-1): """ This function calculates the distance RMSD from a trajectory. The distances selected for the calculation are all the distances between pair of atoms from residues that are at least **seq_sep** apart in the sequence and that are smaller than **radius** in **ref_sel**. The number and order of atoms in **ref_sele** and **sele** should be the same. :param t: the trajectory :param sele: the selection used to calculate the distance RMSD :param ref_sele: the reference selection used to determine the atom pairs and reference distances :param radius: the upper limit of distances in ref_sele considered for the calculation :param seq_sep: the minimal sequence separation between atom pairs considered for the calculation :param average: use the average distance in the trajectory as reference instead of the distance obtained from ref_sele :param first: the first frame of t to be used :param last: the last frame of t to be used :type t: :class:`~ost.mol.CoordGroupHandle` :type sele: :class:`~ost.mol.EntityView` :type ref_sele: :class:`~ost.mol.EntityView` :type radius: :class:`float` :type average: :class:`bool` :type first: :class:`int` :type last: :class:`int` :type seq_sep: :class:`int` :return: a numpy vecor dist_rmsd(N\ :subscript:`frames`). """ if not sele.GetAtomCount() == ref_sele.GetAtomCount(): print('Not same number of atoms in the two views') return try: import numpy as npy if last == -1: last = t.GetFrameCount() n_frames = last - first dist_rmsd = npy.zeros(n_frames) pair_count = 0.0 for i, a1 in enumerate(ref_sele.atoms): for j, a2 in enumerate(ref_sele.atoms): if j <= i: continue r1 = a1.GetResidue() c1 = r1.GetChain() r2 = a2.GetResidue() c2 = r2.GetChain() if c1 == c2 and abs(r2.GetNumber().num - r1.GetNumber().num) < seq_sep: continue d = ost.geom.Distance(a1.pos, a2.pos) if d < radius: a3 = sele.atoms[i] a4 = sele.atoms[j] d_traj = ost.mol.alg.AnalyzeDistanceBetwAtoms( t, a3.GetHandle(), a4.GetHandle())[first:last] if average: d = npy.mean(d_traj) for k, el in enumerate(d_traj): dist_rmsd[k] += (el - d)**2.0 pair_count += 1.0 return (dist_rmsd / float(pair_count))**0.5 except ImportError: LogError("Function needs numpy, but I could not import it.") raise
def ClustalW(seq1, seq2=None, clustalw=None, keep_files=False, nopgap=False, clustalw_option_string=False): ''' Runs a ClustalW multiple sequence alignment. The results are returned as a :class:`~ost.seq.AlignmentHandle` instance. There are two ways to use this function: - align exactly two sequences: :param seq1: sequence_one :type seq1: :class:`~ost.seq.SequenceHandle` or :class:`str` :param seq2: sequence_two :type seq2: :class:`~ost.seq.SequenceHandle` or :class:`str` The two sequences can be specified as two separate function parameters (`seq1`, `seq2`). The type of both parameters can be either :class:`~ost.seq.SequenceHandle` or :class:`str`, but must be the same for both parameters. - align two or more sequences: :param seq1: sequence_list :type seq1: :class:`~ost.seq.SequenceList` :param seq2: must be :class:`None` Two or more sequences can be specified by using a :class:`~ost.seq.SequenceList`. It is then passed as the first function parameter (`seq1`). The second parameter (`seq2`) must be :class:`None`. :param clustalw: path to ClustalW executable (used in :func:`~ost.settings.Locate`) :type clustalw: :class:`str` :param nopgap: turn residue-specific gaps off :type nopgap: :class:`bool` :param clustalw_option_string: additional ClustalW flags (see http://www.clustal.org/download/clustalw_help.txt) :type clustalw_option_string: :class:`str` :param keep_files: do not delete temporary files :type keep_files: :class:`bool` .. note :: - In the passed sequences ClustalW will convert lowercase to uppercase, and change all '.' to '-'. OST will convert and '?' to 'X' before aligning sequences with ClustalW. - If a :attr:`sequence name <ost.seq.SequenceHandle.name>` contains spaces, only the part before the space is considered as sequence name. To avoid surprises, you should remove spaces from the sequence name. - Sequence names must be unique (:class:`ValueError` exception raised otherwise). ClustalW will accept only IUB/IUPAC amino acid and nucleic acid codes: ======= ======================= ======= ============================ Residue Name Residue Name ======= ======================= ======= ============================ A alanine P proline B aspartate or asparagine Q glutamine C cystine R arginine D aspartate S serine E glutamate T threonine F phenylalanine U selenocysteine G glycine V valine H histidine W tryptophan I isoleucine Y tyrosine K lysine Z glutamate or glutamine L leucine X any M methionine \\* translation stop N asparagine \\- gap of indeterminate length ======= ======================= ======= ============================ ''' clustalw_path = settings.Locate(('clustalw', 'clustalw2'), explicit_file_name=clustalw) if seq2 != None: if isinstance(seq1, seq.SequenceHandle) and isinstance( seq2, seq.SequenceHandle): seq_list = seq.CreateSequenceList() seq_list.AddSequence(seq1) seq_list.AddSequence(seq2) elif isinstance(seq1, str) and isinstance(seq2, str): seqh1 = seq.CreateSequence("seq1", seq1) seqh2 = seq.CreateSequence("seq2", seq2) seq_list = seq.CreateSequenceList() seq_list.AddSequence(seqh1) seq_list.AddSequence(seqh2) else: LogError("WARNING: Specify at least two Sequences") return elif isinstance(seq1, seq.SequenceList): seq_list = seq1 else: LogError( "WARNING: Specify either two SequenceHandles or one SequenceList") return sequence_names = set() for s in seq_list: # we cut out anything after a space to be consistent with ClustalW behaviour sequence_names.add(s.GetName().split(' ')[0]) if len(sequence_names) < len(seq_list): raise ValueError( "ClustalW can only process sequences with unique identifiers!") new_list = seq.CreateSequenceList() for s in seq_list: ss = s.Copy() for i, c in enumerate(ss): if c == '?': ss[i] = 'X' new_list.AddSequence(ss) seq_list = new_list temp_dir = utils.TempDirWithFiles((seq_list, )) out = os.path.join(temp_dir.dirname, 'out.fasta') command = '%s -infile="%s" -output=fasta -outfile="%s"' % ( clustalw_path, temp_dir.files[0], out) if nopgap: command += " -nopgap" if clustalw_option_string != False: command = command + " " + clustalw_option_string #see useful flags: http://toolkit.tuebingen.mpg.de/clustalw/help_params subprocess.run(command, shell=True, stdout=subprocess.DEVNULL) aln = io.LoadAlignment(out) for sequence in seq_list: for seq_num, aln_seq in enumerate(aln.sequences): if aln_seq.GetName() == sequence.GetName(): break aln.SetSequenceOffset(seq_num, sequence.offset) if sequence.HasAttachedView(): aln.AttachView(seq_num, sequence.GetAttachedView().Copy()) if not keep_files: temp_dir.Cleanup() return aln