Ejemplo n.º 1
0
 def testLogMultipleMessages(self):
   # observation: converting non-strings in logging can break following calls
   ls = _CapturingLogSink()
   ost.PushVerbosityLevel(1)
   ost.PushLogSink(ls)
   ost.LogWarning('foo')
   self.assertEqual(ls.message, 'foo\n')
   ost.LogWarning(1)
   self.assertEqual(ls.message, '1\n')
   ost.LogWarning('bar')
   self.assertEqual(ls.message, 'bar\n')
   ost.PopLogSink()
Ejemplo n.º 2
0
 def testLogMessage(self):
   ls = _CapturingLogSink()
   ost.PushVerbosityLevel(1)
   ost.PushLogSink(ls)
   ost.LogError('error message')
   self.assertEqual(ls.message, 'error message\n')
   self.assertEqual(ls.severity, 0)
   ost.LogWarning(1, 2, 3)
   self.assertEqual(ls.message, '1 2 3\n')
   self.assertEqual(ls.severity, 1)
   ost.PopLogSink()
Ejemplo n.º 3
0
    def A3MToCS(self, a3m_file, cs_file=None, options={}):
        """
        Converts the A3M alignment file to a column state sequence file. If
        cs_file is not given, the output file will be set to
        <:attr:`a3m_file`-basename>.seq219.

        If the file was already produced, the existing file path is returned
        without recomputing it.

        :param a3m_file: Path to input MSA as produced by :meth:`BuildQueryMSA`
        :type a3m_file: :class:`str`

        :param cs_file: Output file name (may be omitted)
        :type cs_file: :class:`str`

        :param options: Dictionary of options to *cstranslate*, one "-" is added
                        in front of every key. Boolean True values add flag
                        without value.
        :type options: :class:`dict`

        :return: Path to the column state sequence file
        :rtype: :class:`str`
        """
        cstranslate = os.path.join(self.hhlib_dir, 'bin', 'cstranslate')
        if not cs_file:
            cs_file = '%s.seq219' % os.path.splitext(a3m_file)[0]
        if os.path.exists(cs_file):
            return cs_file
        opt_cmd, _ = _ParseOptions(options)
        cs_cmd = '%s -i %s -o %s %s' % (
            cstranslate,
            os.path.abspath(a3m_file),
            os.path.abspath(cs_file),
            opt_cmd)
        ost.LogVerbose('converting %s to %s' % (a3m_file, cs_file))
        job = subprocess.Popen(cs_cmd, shell=True, cwd=self.working_dir,
                               stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        sout, _ = job.communicate()
        if b'Wrote abstract state sequence to' in sout:
            return cs_file

        ost.LogWarning('Creating column state sequence file (%s) failed' % \
                       cs_file)
Ejemplo n.º 4
0
    def A3MToProfile(self, a3m_file, hhm_file=None):
        """
        Converts the A3M alignment file to a hhm profile. If hhm_file is not
        given, the output file will be set to <:attr:`a3m_file`-basename>.hhm.

        The produced HHM file can be parsed by :func:`ParseHHM`.

        If the file was already produced, the existing file path is returned
        without recomputing it.

        :param a3m_file: Path to input MSA as produced by :meth:`BuildQueryMSA`
        :type a3m_file: :class:`str`

        :param hhm_file: Desired output file name 
        :type hhm_file: :class:`str`

        :return: Path to the profile file
        :rtype: :class:`str`
        """
        hhmake = os.path.join(self.bin_dir, 'hhmake')
        if not hhm_file:
            hhm_file = '%s.hhm' % os.path.splitext(a3m_file)[0]
        if os.path.exists(hhm_file):
            return hhm_file
        ost.LogVerbose('converting %s to %s' % (a3m_file, hhm_file))
        os.putenv('HHLIB', self.hhlib_dir)
        job = subprocess.Popen('%s -i %s -o %s' % (hhmake, a3m_file, hhm_file),
                               shell=True, stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE)
        sout, serr = job.communicate()
        lines = serr.decode().splitlines()
        for line in lines:
            ost.LogWarning(line)
        lines = sout.decode().splitlines()
        for line in lines:
            ost.LogVerbose(line)
        if job.returncode !=0:
            raise IOError('could not convert a3m to hhm file')
        return hhm_file
Ejemplo n.º 5
0
def _ParseAmberForceField(filename):
    """Read frcmod file produced by parmchk2 and return dictionary with all
  entries for masses, bonds, angles, dihedrals, impropers and non-bonded (LJ)
  interactions. Stored as key/list-of-value pairs:
  - 'MASS': [atype, mass]
  - 'BOND': [atype1, atype2, d0, k]
  - 'ANGL': [atype1, atype2, atype3, a0, k]
  - 'DIHE': [atype1, atype2, atype3, atype4, idiv, period, phase, k/idiv]
  - 'IMPR': [atype1, atype2, atype3, atype4, period, phase, k]
  - 'NONB': [Rvdw, epsilon]
  """
    keywords = ['MASS', 'BOND', 'ANGL', 'DIHE', 'IMPR', 'NONB']
    with open(filename, 'r') as in_file:
        ff_dict = {}
        for line in in_file:
            # look for keywords
            keyword = line[:4]
            if not keyword in keywords: continue
            # loop until empty line found
            ff_dict[keyword] = []
            line = next(in_file)
            while len(line.strip()) > 0:
                # check for warnings
                if 'ATTN' in line:
                    ost.LogWarning('The following line in ' + filename + ' (' + keyword +\
                                   ' section) needs revision:\n' + line.strip())
                # fixed column format -> extract entries dep. on current keyword
                if keyword == 'MASS':
                    atype = line[0:2].strip()
                    s = line[2:].split()
                    mass = float(s[0])
                    ff_dict[keyword].append([atype, mass])
                elif keyword == 'BOND':
                    atype1 = line[:2].strip()
                    atype2 = line[3:5].strip()
                    s = line[5:].split()
                    k = float(s[0])
                    d0 = float(s[1])
                    ff_dict[keyword].append([atype1, atype2, d0, k])
                elif keyword == 'ANGL':
                    atype1 = line[:2].strip()
                    atype2 = line[3:5].strip()
                    atype3 = line[6:8].strip()
                    s = line[8:].split()
                    k = float(s[0])
                    a0 = float(s[1])
                    ff_dict[keyword].append([atype1, atype2, atype3, a0, k])
                elif keyword == 'DIHE':
                    atype1 = line[:2].strip()
                    atype2 = line[3:5].strip()
                    atype3 = line[6:8].strip()
                    atype4 = line[9:11].strip()
                    s = line[11:].split()
                    idiv = float(s[0])
                    k = float(s[1])
                    phase = float(s[2])
                    # negative periods = there is more than one term for that dihedral
                    # -> no need to do anything special about that here...
                    period = abs(float(s[3]))
                    ff_dict[keyword].append([
                        atype1, atype2, atype3, atype4, idiv, period, phase,
                        k / float(idiv)
                    ])
                elif keyword == 'IMPR':
                    atype1 = line[:2].strip()
                    atype2 = line[3:5].strip()
                    atype3 = line[6:8].strip()
                    atype4 = line[9:11].strip()
                    s = line[11:].split()
                    k = float(s[0])
                    phase = float(s[1])
                    period = float(s[2])
                    ff_dict[keyword].append(
                        [atype1, atype2, atype3, atype4, period, phase, k])
                elif keyword == 'NONB':
                    line = line.strip()
                    atype = line[0:2].strip()
                    s = line[2:].split()
                    Rvdw = float(s[0])
                    epsilon = float(s[1])
                    ff_dict[keyword].append([atype, Rvdw, epsilon])
                # next...
                line = next(in_file)
    return ff_dict
Ejemplo n.º 6
0
def compare_structures(model, reference):
    """ Compare the structures and return an lDDT value. """
    with temporary_file(delete=False) as model_file, \
            temporary_file(delete=False) as target_file, \
            temporary_file(delete=False) as output_file:
        # We'll open/close the file later by name so we don't want python
        # to mess up
        model_file.close()
        target_file.close()
        output_file.close()
        ost.io.SavePDB(model, model_file.name)
        ost.io.SavePDB(reference, target_file.name)
        cmd = [
            '/usr/bin/env',
            'ost',
            'compare-structures',
            '-r',
            target_file.name,
            '-m',
            model_file.name,
            '-o',
            output_file.name,
            '--lddt',
            '-v',
            str(ost.GetVerbosityLevel()),
            '--residue-number-alignment'  # do not re-align
        ]
        proc = subprocess.Popen(cmd,
                                stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE)
        stdout, stderr = proc.communicate()
        if proc.returncode != 0:
            if reference.atom_count == 0:
                # Reference is empty, probably because it covered an other
                # portion of the target sequence that doesn't include our
                # domain of interest. We keep track of this hit with missing
                # values (None)
                ost.LogWarning(
                    "The reference structure appears to be empty. " +
                    "This probably means it didn't cover this " +
                    "domain. Returning missing values.")
                return {
                    'weighted_lddt': None,
                    'oligo_lddt': None,
                    'single_chain_lddt': None,
                    'coverage': None,
                    'chain_mapping': None,
                    'best_chain': None,
                }
            else:
                ost.LogError(
                    "ost compare-structures returned %s, error stream included below "
                    % proc.returncode)
                ost.LogError(stderr)
                raise RuntimeError("ost compare-structure returned %s" %
                                   proc.returncode)
        compare_data = json.load(open(output_file.name))
        try:
            compare_results = compare_data['result'][os.path.basename(
                model_file.name)][os.path.basename(target_file.name)]
        except KeyError:
            ost.LogError("ost compare-structures retured a success code but " +
                         "the expected data is not available. This is " +
                         "probably caused by an acceptable error such as " +
                         "a failure in chain mapping. Returning missing " +
                         "values. STDERR output included below: ")
            ost.LogError(stderr)
            return {
                'weighted_lddt': None,
                'oligo_lddt': None,
                'single_chain_lddt': None,
                'coverage': None,
                'chain_mapping': None,
                'best_chain': None,
            }

        # What is the coverage of the model?
        coverages = []
        for aln_str in compare_results['info']['mapping']['alignments']:
            aln = ost.io.AlignmentFromString(str(aln_str), 'fasta')
            coverages.append(aln.GetCoverage(1))
            assert aln.GetCount() == 2

        lddt_results = compare_results['lddt']
        # single_chain_lddts = []
        best_single_chain_lddt = 0
        best_chain = None
        for sc_lddt in lddt_results['single_chain_lddt']:
            assert sc_lddt['status'] == 'SUCCESS'
            # single_chain_lddts.append(sc_lddt['global_score'])
            if sc_lddt['global_score'] >= best_single_chain_lddt:
                best_single_chain_lddt = sc_lddt['global_score']
                best_chain = sc_lddt['model_chain']

        assert lddt_results['oligo_lddt']['status'] == 'SUCCESS'
        assert lddt_results['weighted_lddt']['status'] == 'SUCCESS'

        return {
            'weighted_lddt': lddt_results['weighted_lddt']['global_score'],
            'oligo_lddt': lddt_results['oligo_lddt']['global_score'],
            'single_chain_lddt': best_single_chain_lddt,
            'best_chain': best_chain,
            'coverage': max(coverages),
            'chain_mapping':
            compare_results['info']['mapping']['chain_mapping'],
        }
Ejemplo n.º 7
0
    def BuildQueryMSA(self, nrdb, options={}, a3m_file=None):
        """Builds the MSA for the query sequence.

        This function directly uses hhblits of hhtools. While in theory it would
        be possible to do this by PSI-blasting on our own, hhblits is supposed
        to be faster. Also it is supposed to prevent alignment corruption. The
        alignment corruption is caused by low-scoring terminal alignments that
        draw the sequences found by PSI-blast away from the optimum. By removing
        these low scoring ends, part of the alignment corruption can be
        suppressed.

        hhblits does **not** call PSIPRED on the MSA to predict the secondary
        structure of the query sequence. This is done by addss.pl of hhtools.
        The predicted secondary structure is stored together with the sequences
        identified by hhblits.

        The produced A3M file can be parsed by :func:`ParseA3M`. If the file was
        already produced, hhblits is not called again and the existing file path
        is returned.

        :param nrdb: Database to be align against; has to be an hhblits database
        :type nrdb: :class:`str`

        :param options: Dictionary of options to *hhblits*, one "-" is added in
                        front of every key. Boolean True values add flag without
                        value. Merged with default options {'cpu': 1, 'n': 1},
                        where 'n' defines the number of iterations.
        :type options: :class:`dict`

        :param a3m_file: a path of a3m_file to be used, optional
        :type a3m_file: :class:`str`

        :return: The path to the A3M file containing the MSA
        :rtype: :class:`str`
        """
        if a3m_file is None:
            a3m_file = '%s.a3m' % os.path.splitext(self.filename)[0]
        else:
            a3m_file = os.path.abspath(a3m_file)
        if os.path.exists(a3m_file):
            ost.LogInfo('Reusing already existing query alignment (%s)' % a3m_file)
            return a3m_file
        ost.LogInfo('Using hhblits from "%s"' % self.hhsuite_root)
        full_nrdb = os.path.join(os.path.abspath(os.path.split(nrdb)[0]),
                                 os.path.split(nrdb)[1])
        # create MSA
        opts = {'cpu' : 1, # no. of cpus used
                'n'   : 1}   # no. of iterations
        opts.update(options)
        opt_cmd, _ = _ParseOptions(opts)
        hhblits_cmd = '%s -e 0.001 -i %s -oa3m %s -d %s %s' % \
                      (self.hhblits_bin, self.filename, a3m_file, full_nrdb,
                       opt_cmd)
        job = subprocess.Popen(hhblits_cmd, shell=True, cwd=self.working_dir,
                               stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        sout, _ = job.communicate()
        lines = sout.decode().splitlines()
        for line in lines:
            ost.LogVerbose(line.strip())
        if not os.path.exists(a3m_file):
            ost.LogWarning('Building query profile failed, no output')
            return a3m_file
        # add secondary structure annotation
        addss_cmd = "%s %s" % (os.path.join(self.hhsuite_root,
                                            'lib/hh/scripts/addss.pl'),
                               a3m_file)
        env = dict(os.environ)
        env.update({'PERL5LIB' : os.path.join(self.hhsuite_root,
                                              'lib/hh/scripts'),
                    'HHLIB' : self.hhlib_dir,
                    'PATH' : '%s:%s' % (os.path.join(self.hhsuite_root, 'bin'),
                                        os.environ['PATH'])})
        job = subprocess.Popen(addss_cmd, shell=True, cwd=self.working_dir,
                               env=env, stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE)
        sout, serr = job.communicate()
        lines = sout.decode().splitlines()
        for line in lines:
            if 'error' in line.lower():
                ost.LogWarning('Predicting secondary structure for MSA '+
                               '(%s) failed, on command: %s' % (a3m_file, line))
                return a3m_file
        return a3m_file