コード例 #1
0
def AverageDistanceMatrixFromTraj(t, sele, first=0, last=-1):
    """
  This function calcultes the distance between each pair of atoms
  in **sele**, averaged over the trajectory **t**.

  :param t: the trajectory
  :param sele: the selection used to determine the atom pairs
  :param first: the first frame of t to be used
  :param last: the last frame of t to be used
  :type t: :class:`~ost.mol.CoordGroupHandle`
  :type sele: :class:`~ost.mol.EntityView`
  :type first: :class:`int`
  :type last: :class:`int`

  :return: a numpy N\ :subscript:`pairs`\ xN\ :subscript:`pairs` matrix, where N\ :subscript:`pairs`
   is the number of atom pairs in **sele**.
  """
    try:
        import numpy as npy
    except ImportError:
        LogError("Function needs numpy, but I could not import it.")
        raise
    n_atoms = sele.GetAtomCount()
    M = npy.zeros([n_atoms, n_atoms])
    for i, a1 in enumerate(sele.atoms):
        for j, a2 in enumerate(sele.atoms):
            if j > i: continue
            d = ost.mol.alg.AnalyzeDistanceBetwAtoms(
                t, a1.GetHandle(), a2.GetHandle())[first:last]
            M[i, j] = npy.mean(d)
            M[j, i] = npy.mean(d)
    return M
コード例 #2
0
def DistanceMatrixFromPairwiseDistances(distances, p=2):
    """
  This function calculates an distance matrix M(N\ :subscript:`frames`\ xN\ :subscript:`frames`\ ) from
  the pairwise distances matrix D(N\ :subscript:`pairs`\ xN\ :subscript:`frames`\ ), where 
  N\ :subscript:`frames` is the number of frames in the trajectory
  and N\ :subscript:`pairs` the number of atom pairs.
  M[i,j] is the distance between frame i and frame j
  calculated as a p-norm of the differences in distances
  from the two frames (distance-RMSD for p=2).

  :param distances: a pairwise distance matrix as obtained from 
   :py:func:`~mol.alg.trajectory_analysis.PairwiseDistancesFromTraj`
  :param p: exponent used for the p-norm.

  :return: a numpy N\ :subscript:`frames`\ xN\ :subscript:`frames` matrix, where N\ :subscript:`frames`
   is the number of frames.
  """
    try:
        import numpy as npy
        n1 = distances.shape[0]
        n2 = distances.shape[1]
        dist_mat = npy.identity(n2)
        for i in range(n2):
            for j in range(n2):
                if j <= i: continue
                d = (((abs(distances[:, i] - distances[:, j])**p).sum()) /
                     float(n1))**(1. / p)
                dist_mat[i, j] = d
                dist_mat[j, i] = d
        return dist_mat
    except ImportError:
        LogError("Function needs numpy, but I could not import it.")
        raise
コード例 #3
0
 def _DoLoad(self):
     split_ids = []
     for p in str(self._line.text()).split(','):
         for k in p.split():
             if len(k.strip()):
                 split_ids.append(k.strip())
     self._line.setText('')
     for split_id in split_ids:
         try:
             ent = RemoteLoad(split_id, from_repo=self._current_repo)
         except Exception as e:
             LogError(str(e))
             continue
         g = gfx.Entity(split_id, ent)
         try:
             gfx.Scene().Add(g)
         except Exception as e:
             LogError(str(e))
コード例 #4
0
 def __CalculateSurface(self,ent_list,name,msms_exe,density,
                        radius,selection,noh,nohet,nowat):
   for entity in ent_list:
     if isinstance(entity, gfx.Entity):
       try:
         s=msms.CalculateSurface(entity.view,
                                 msms_exe=msms_exe,
                                 density=density,
                                 radius=radius,
                                 selection=selection,
                                 no_hydrogens=noh,
                                 no_hetatoms=nohet,
                                 no_waters=nowat)[0]
         gfx.Scene().Add(gfx.Surface("%s_%s"%(entity.GetName(),name),s))
       except (RuntimeError, msms.MsmsProcessError):
         LogError("WARNING: Surface could not be calculated")
         return
       except UserWarning:
         LogError("WARNING: Entry with the same name already present in scene")
         return
コード例 #5
0
def AnalyzeDistanceFluctuationMatrix(t, sele, first=0, last=-1):
    try:
        import numpy as npy
    except ImportError:
        LogError("Function needs numpy, but I could not import it.")
        raise
    n_atoms = sele.GetAtomCount()
    M = npy.zeros([n_atoms, n_atoms])
    for i, a1 in enumerate(sele.atoms):
        for j, a2 in enumerate(sele.atoms):
            if i > j: continue
            d = ost.mol.alg.AnalyzeDistanceBetwAtoms(
                t, a1.GetHandle(), a2.GetHandle())[first:last]
            M[j, i] = npy.std(d)
            M[i, j] = npy.std(d)
    return M
コード例 #6
0
def RMSD_Matrix_From_Traj(t,
                          sele,
                          first=0,
                          last=-1,
                          align=True,
                          align_sele=None):
    """
  This function calculates a matrix M such that M[i,j] is the
  RMSD (calculated on **sele**) between frames i and j of the trajectory **t**
  aligned on sele.

  :param t: the trajectory
  :param sele: the selection used for alignment and RMSD calculation
  :param first: the first frame of t to be used
  :param last: the last frame of t to be used
  :type t: :class:`~ost.mol.CoordGroupHandle`
  :type sele: :class:`~ost.mol.EntityView`
  :type first: :class:`int`
  :type last: :class:`int`

  :return: Returns a numpy N\ :subscript:`frames`\ xN\ :subscript:`frames` matrix, 
   where N\ :subscript:`frames` is the number of frames.
  """
    if not align_sele: align_sele = sele
    try:
        import numpy as npy
        if last == -1: last = t.GetFrameCount()
        n_frames = last - first
        rmsd_matrix = npy.identity(n_frames)
        for i in range(n_frames):
            if align:
                t = ost.mol.alg.SuperposeFrames(t,
                                                align_sele,
                                                begin=first,
                                                end=last,
                                                ref=i)
                eh = t.GetEntity()
            t.CopyFrame(i)
            rmsd_matrix[i, :] = ost.mol.alg.AnalyzeRMSD(t, sele, sele)
            if i == 0:
                last = last - first
                first = 0
        return rmsd_matrix
    except ImportError:
        LogError("Function needs numpy, but I could not import it.")
        raise
コード例 #7
0
def PairwiseDistancesFromTraj(t, sele, first=0, last=-1, seq_sep=1):
    """
  This function calculates the distances between any pair of atoms in **sele**  
  with sequence separation larger than **seq_sep** from a trajectory **t**.
  It return a matrix containing one line for each atom pair and N\ :subscript:`frames` columns, where
  N\ :subscript:`frames` is the number of frames in the trajectory.
  
  :param t: the trajectory
  :param sele: the selection used to determine the atom pairs
  :param first: the first frame of t to be used
  :param last: the last frame of t to be used
  :param seq_sep: The minimal sequence separation between atom pairs
  :type t: :class:`~ost.mol.CoordGroupHandle`
  :type sele: :class:`~ost.mol.EntityView`
  :type first: :class:`int`
  :type last: :class:`int`
  :type seq_sep: :class:`int`

  :return: a numpy N\ :subscript:`pairs`\ xN\ :subscript:`frames` matrix.
  """
    try:
        import numpy as npy
        if last == -1: last = t.GetFrameCount()
        n_frames = last - first
        n_var = 0
        for i, a1 in enumerate(sele.atoms):
            for j, a2 in enumerate(sele.atoms):
                if not j - i < seq_sep: n_var += 1
        #n_var=sele.GetAtomCount()
        #n_var=(n_var-1)*(n_var)/2.
        dist_matrix = npy.zeros(n_frames * n_var)
        dist_matrix = dist_matrix.reshape(n_var, n_frames)
        k = 0
        for i, a1 in enumerate(sele.atoms):
            for j, a2 in enumerate(sele.atoms):
                if j - i < seq_sep: continue
                dist_matrix[k] = ost.mol.alg.AnalyzeDistanceBetwAtoms(
                    t, a1.GetHandle(), a2.GetHandle())[first:last]
                k += 1
        return dist_matrix
    except ImportError:
        LogError("Function needs numpy, but I could not import it.")
        raise
コード例 #8
0
def DistRMSDFromTraj(t,
                     sele,
                     ref_sele,
                     radius=7.0,
                     average=False,
                     seq_sep=4,
                     first=0,
                     last=-1):
    """
  This function calculates the distance RMSD from a trajectory.
  The distances selected for the calculation are all the distances
  between pair of atoms from residues that are at least **seq_sep** apart
  in the sequence and that are smaller than **radius** in **ref_sel**.
  The number and order of atoms in **ref_sele** and **sele** should be the same.

  :param t: the trajectory
  :param sele: the selection used to calculate the distance RMSD
  :param ref_sele: the reference selection used to determine the atom pairs and reference distances
  :param radius: the upper limit of distances in ref_sele considered for the calculation
  :param seq_sep: the minimal sequence separation between atom pairs considered for the calculation 
  :param average: use the average distance in the trajectory as reference instead of the distance obtained from ref_sele
  :param first: the first frame of t to be used
  :param last: the last frame of t to be used
  
  :type t: :class:`~ost.mol.CoordGroupHandle`
  :type sele: :class:`~ost.mol.EntityView`
  :type ref_sele: :class:`~ost.mol.EntityView`
  :type radius: :class:`float`
  :type average: :class:`bool`
  :type first: :class:`int`
  :type last: :class:`int`
  :type seq_sep: :class:`int`

  :return: a numpy vecor dist_rmsd(N\ :subscript:`frames`).  
  """
    if not sele.GetAtomCount() == ref_sele.GetAtomCount():
        print('Not same number of atoms in the two views')
        return
    try:
        import numpy as npy
        if last == -1: last = t.GetFrameCount()
        n_frames = last - first
        dist_rmsd = npy.zeros(n_frames)
        pair_count = 0.0
        for i, a1 in enumerate(ref_sele.atoms):
            for j, a2 in enumerate(ref_sele.atoms):
                if j <= i: continue
                r1 = a1.GetResidue()
                c1 = r1.GetChain()
                r2 = a2.GetResidue()
                c2 = r2.GetChain()
                if c1 == c2 and abs(r2.GetNumber().num -
                                    r1.GetNumber().num) < seq_sep:
                    continue
                d = ost.geom.Distance(a1.pos, a2.pos)
                if d < radius:
                    a3 = sele.atoms[i]
                    a4 = sele.atoms[j]
                    d_traj = ost.mol.alg.AnalyzeDistanceBetwAtoms(
                        t, a3.GetHandle(), a4.GetHandle())[first:last]
                    if average: d = npy.mean(d_traj)
                    for k, el in enumerate(d_traj):
                        dist_rmsd[k] += (el - d)**2.0
                    pair_count += 1.0
        return (dist_rmsd / float(pair_count))**0.5
    except ImportError:
        LogError("Function needs numpy, but I could not import it.")
        raise
コード例 #9
0
ファイル: clustalw.py プロジェクト: sailfish009/openstructure
def ClustalW(seq1,
             seq2=None,
             clustalw=None,
             keep_files=False,
             nopgap=False,
             clustalw_option_string=False):
    '''
  Runs a ClustalW multiple sequence alignment. The results are returned as a
  :class:`~ost.seq.AlignmentHandle` instance.
  
  There are two ways to use this function:
  
   - align exactly two sequences:
   
      :param seq1: sequence_one
      :type seq1: :class:`~ost.seq.SequenceHandle` or :class:`str`
      
      :param seq2: sequence_two
      :type seq2: :class:`~ost.seq.SequenceHandle` or :class:`str`
  
      The two sequences can be specified as two separate function parameters 
      (`seq1`, `seq2`). The type of both parameters can be either
      :class:`~ost.seq.SequenceHandle` or :class:`str`, but must be the same for
      both parameters.
      
   - align two or more sequences:
   
      :param seq1: sequence_list
      :type seq1: :class:`~ost.seq.SequenceList`
      
      :param seq2: must be :class:`None`
      
      Two or more sequences can be specified by using a
      :class:`~ost.seq.SequenceList`. It is then passed as the first function 
      parameter (`seq1`). The second parameter (`seq2`) must be :class:`None`.
      
       
  :param clustalw: path to ClustalW executable (used in :func:`~ost.settings.Locate`)
  :type clustalw: :class:`str`
  :param nopgap: turn residue-specific gaps off
  :type nopgap: :class:`bool`
  :param clustalw_option_string: additional ClustalW flags (see http://www.clustal.org/download/clustalw_help.txt)
  :type clustalw_option_string: :class:`str`
  :param keep_files: do not delete temporary files
  :type keep_files: :class:`bool`

  .. note ::
   
    - In the passed sequences ClustalW will convert lowercase to uppercase, and
      change all '.' to '-'. OST will convert and '?' to 'X' before aligning
      sequences with ClustalW.
    - If a :attr:`sequence name <ost.seq.SequenceHandle.name>` contains spaces,
      only the part before the space is considered as sequence name. To avoid
      surprises, you should remove spaces from the sequence name.
    - Sequence names must be unique (:class:`ValueError` exception raised
      otherwise).

  ClustalW will accept only IUB/IUPAC amino acid and nucleic acid codes:

  ======= ======================= ======= ============================ 
  Residue  Name                   Residue  Name 
  ======= ======================= ======= ============================
     A    alanine                    P    proline
     B    aspartate or asparagine    Q    glutamine
     C    cystine                    R    arginine
     D    aspartate                  S    serine
     E    glutamate                  T    threonine
     F    phenylalanine              U    selenocysteine
     G    glycine                    V    valine
     H    histidine                  W    tryptophan
     I    isoleucine                 Y    tyrosine
     K    lysine                     Z    glutamate or glutamine
     L    leucine                    X    any
     M    methionine                 \\*   translation stop
     N    asparagine                 \\-   gap of indeterminate length
  ======= ======================= ======= ============================ 

  '''
    clustalw_path = settings.Locate(('clustalw', 'clustalw2'),
                                    explicit_file_name=clustalw)

    if seq2 != None:
        if isinstance(seq1, seq.SequenceHandle) and isinstance(
                seq2, seq.SequenceHandle):
            seq_list = seq.CreateSequenceList()
            seq_list.AddSequence(seq1)
            seq_list.AddSequence(seq2)
        elif isinstance(seq1, str) and isinstance(seq2, str):
            seqh1 = seq.CreateSequence("seq1", seq1)
            seqh2 = seq.CreateSequence("seq2", seq2)
            seq_list = seq.CreateSequenceList()
            seq_list.AddSequence(seqh1)
            seq_list.AddSequence(seqh2)
        else:
            LogError("WARNING: Specify at least two Sequences")
            return
    elif isinstance(seq1, seq.SequenceList):
        seq_list = seq1
    else:
        LogError(
            "WARNING: Specify either two SequenceHandles or one SequenceList")
        return

    sequence_names = set()
    for s in seq_list:
        # we cut out anything after a space to be consistent with ClustalW behaviour
        sequence_names.add(s.GetName().split(' ')[0])
    if len(sequence_names) < len(seq_list):
        raise ValueError(
            "ClustalW can only process sequences with unique identifiers!")

    new_list = seq.CreateSequenceList()
    for s in seq_list:
        ss = s.Copy()
        for i, c in enumerate(ss):
            if c == '?':
                ss[i] = 'X'
        new_list.AddSequence(ss)

    seq_list = new_list

    temp_dir = utils.TempDirWithFiles((seq_list, ))
    out = os.path.join(temp_dir.dirname, 'out.fasta')
    command = '%s -infile="%s" -output=fasta -outfile="%s"' % (
        clustalw_path, temp_dir.files[0], out)
    if nopgap:
        command += " -nopgap"
    if clustalw_option_string != False:
        command = command + " " + clustalw_option_string  #see useful flags: http://toolkit.tuebingen.mpg.de/clustalw/help_params

    subprocess.run(command, shell=True, stdout=subprocess.DEVNULL)

    aln = io.LoadAlignment(out)

    for sequence in seq_list:
        for seq_num, aln_seq in enumerate(aln.sequences):
            if aln_seq.GetName() == sequence.GetName():
                break
        aln.SetSequenceOffset(seq_num, sequence.offset)
        if sequence.HasAttachedView():
            aln.AttachView(seq_num, sequence.GetAttachedView().Copy())

    if not keep_files:
        temp_dir.Cleanup()

    return aln