예제 #1
0
    def test_parse(self):
        logfile = os.path.join(self.testfiles_dir, "tmalign.log")
        TM = tm_parser.TMalignLogParser()
        TM.parse(logfile)

        self.assertEqual(143, TM.nr_residues_common)
        self.assertEqual(0.70502, TM.tm)
        self.assertEqual(2.68, TM.rmsd)
        self.assertEqual(0.182, TM.seq_id)
예제 #2
0
파일: tm_util.py 프로젝트: linucks/ample
    def comparison(self, models, structures):
        """
        Compare a list of model structures to a second list of reference structures

        Parameters
        ----------
        models : list
           List containing the paths to the model structure files
        structures : list
           List containing the paths to the reference structure files

        Returns
        -------
        entries : list
           List of TMscore data entries on a per-model basis

        """

        if len(models) < 1 or len(structures) < 1:
            msg = 'No model structures provided' if len(
                models) < 1 else 'No reference structures provided'
            logger.critical(msg)
            raise RuntimeError(msg)

        elif len(structures) == 1:
            logger.info(
                'Using single structure provided for all model comparisons')
            structures = [structures[0] for _ in xrange(len(models))]

        elif len(models) != len(structures):
            msg = "Unequal number of models and structures!"
            logger.critical(msg)
            raise RuntimeError(msg)

        if self.method == "tmalign":
            pt = tm_parser.TMalignLogParser()
        elif self.method == "tmscore":
            pt = tm_parser.TMscoreLogParser()
        else:
            msg = "Invalid method selected: %s", self.method
            logger.critical(msg)
            raise RuntimeError(msg)

        logger.info('Using algorithm: {0}'.format(self.method))
        logger.info('------- Evaluating decoys -------')
        data_entries, job_scripts, log_files = [], [], []
        for model_pdb, structure_pdb in zip(models, structures):
            model_name = os.path.splitext(os.path.basename(model_pdb))[0]
            structure_name = os.path.splitext(
                os.path.basename(structure_pdb))[0]
            stem = "_".join([model_name, structure_name, self.method])

            if os.path.isfile(model_pdb) and os.path.isfile(structure_pdb):
                data_entries.append(
                    [model_name, structure_name, model_pdb, structure_pdb])
                script = make_script(
                    [self.executable, model_pdb, structure_pdb],
                    prefix="tmscore_",
                    stem=stem,
                    directory=self.tmp_dir)
                job_scripts.append(script)
                log_files.append(os.path.splitext(script)[0] + ".log")
            else:
                if not os.path.isfile(model_pdb):
                    logger.warning("Cannot find: %s", model_pdb)
                if not os.path.isfile(structure_pdb):
                    logger.warning("Cannot find: %s", structure_pdb)
                continue

        logger.info('Executing TManalysis scripts')
        j = Job(self._qtype)
        j.submit(job_scripts,
                 nproc=self._nproc,
                 max_array_jobs=self._max_array_jobs,
                 queue=self._queue,
                 name="tmscore")
        j.wait(interval=1)

        self.entries = []
        for entry, log, script in zip(data_entries, log_files, job_scripts):
            try:
                pt.reset()
                pt.parse(log)
            except Exception:
                logger.critical("Error processing the %s log file: %s",
                                self.method, log)
                log = "None"
            model_name, structure_name, model_pdb, structure_pdb = entry
            _entry = self._store(model_name, structure_name, model_pdb,
                                 structure_pdb, log, pt)
            self.entries.append(_entry)
            os.unlink(script)

        return self.entries
예제 #3
0
파일: tm_util.py 프로젝트: hlasimpk/ample
    def comparison(self, models, structures):
        """
        Compare a list of model structures to a second list of reference structures

        Parameters
        ----------
        models : list
           List containing the paths to the model structure files
        structures : list
           List containing the paths to the reference structure files

        Returns
        -------
        entries : list
           List of TMscore data entries on a per-model basis

        """

        if len(models) < 1 or len(structures) < 1:
            msg = 'No model structures provided' if len(models) < 1 else \
                'No reference structures provided'
            logger.critical(msg)
            raise RuntimeError(msg)

        elif len(structures) == 1:
            logger.info(
                'Using single structure provided for all model comparisons')
            structures = [structures[0] for _ in xrange(len(models))]

        elif len(models) != len(structures):
            msg = "Unequal number of models and structures"
            logger.critical(msg)
            raise RuntimeError(msg)

        # Create a logfile parser
        if self.method == "tmalign":
            pt = tm_parser.TMalignLogParser()
        elif self.method == "tmscore":
            pt = tm_parser.TMscoreLogParser()
        else:
            msg = "Invalid method selected: ", self.method
            logger.critical(msg)
            raise RuntimeError(msg)

        # =======================================================================
        # Iterate through the structure files and execute the TMscore comparisons
        # =======================================================================

        logger.info('Using algorithm: {0}'.format(self.method))
        logger.info('------- Evaluating decoys -------')

        # Construct the job scripts
        data_entries = []  # Store some data
        job_scripts = []  # Hold job scripts
        log_files = []  # Hold paths to log files
        for model_pdb, structure_pdb in zip(models, structures):
            # Some file names
            model_name = os.path.splitext(os.path.basename(model_pdb))[0]
            structure_name = os.path.splitext(
                os.path.basename(structure_pdb))[0]
            prefix = '{0}_{1}_{2}'.format(model_name, structure_name,
                                          self.method)
            if not os.path.isfile(model_pdb):
                logger.warning("Cannot find: {0}".format(model_pdb))
                continue
            elif not os.path.isfile(structure_pdb):
                logger.warning("Cannot find: {0}".format(structure_pdb))
                continue
            # Create the run scripts
            script = tempfile.NamedTemporaryFile(prefix=prefix,
                                                 suffix=ample_util.SCRIPT_EXT,
                                                 delete=False)
            script.write(ample_util.SCRIPT_HEADER + os.linesep * 2)
            script.write('{exe} {model} {reference} {sep}{sep}'.format(
                exe=self.executable,
                model=model_pdb,
                reference=structure_pdb,
                sep=os.linesep,
            ))
            script.close()
            os.chmod(script.name, 0o777)
            job_scripts.append(script.name)
            # Save some more information
            data_entries.append(
                [model_name, structure_name, model_pdb, structure_pdb])
            log_files.append(os.path.splitext(script.name)[0] + ".log")

        # Execute the scripts
        logger.info('Executing TManalysis scripts')
        logger.disabled = True
        success = workers_util.run_scripts(
            job_scripts=job_scripts,
            monitor=None,
            check_success=None,
            early_terminate=None,
            nproc=self._nproc,
            job_time=
            7200,  # Might be too long/short, taken from Rosetta modelling
            job_name='tm_analysis',
            submit_cluster=self._submit_cluster,
            submit_qtype=self._submit_qtype,
            submit_queue=self._submit_queue,
            submit_array=self._submit_array,
            submit_max_array=self._submit_max_array)
        logger.disabled = False

        if not success:
            msg = "Error running TManalysis"
            raise RuntimeError(msg)

        # Extract the data
        entries = []
        for entry, log, script in zip(data_entries, log_files, job_scripts):

            try:
                # Reset the TM log parser to default values
                pt.reset()
                # Parse the TM method logfile to extract the data
                pt.parse(log)
            except Exception:
                msg = "Error processing the {0} log file: {1}".format(
                    self.method, log)
                logger.critical(msg)
                log = "None"

            model_name, structure_name, model_pdb, structure_pdb = entry
            _entry = self._store(model_name, structure_name, model_pdb,
                                 structure_pdb, log, pt)
            entries.append(_entry)

            os.unlink(script)

        self.entries = entries
        return entries