def test_parse(self): logfile = os.path.join(self.testfiles_dir, "tmalign.log") TM = tm_parser.TMalignLogParser() TM.parse(logfile) self.assertEqual(143, TM.nr_residues_common) self.assertEqual(0.70502, TM.tm) self.assertEqual(2.68, TM.rmsd) self.assertEqual(0.182, TM.seq_id)
def comparison(self, models, structures): """ Compare a list of model structures to a second list of reference structures Parameters ---------- models : list List containing the paths to the model structure files structures : list List containing the paths to the reference structure files Returns ------- entries : list List of TMscore data entries on a per-model basis """ if len(models) < 1 or len(structures) < 1: msg = 'No model structures provided' if len( models) < 1 else 'No reference structures provided' logger.critical(msg) raise RuntimeError(msg) elif len(structures) == 1: logger.info( 'Using single structure provided for all model comparisons') structures = [structures[0] for _ in xrange(len(models))] elif len(models) != len(structures): msg = "Unequal number of models and structures!" logger.critical(msg) raise RuntimeError(msg) if self.method == "tmalign": pt = tm_parser.TMalignLogParser() elif self.method == "tmscore": pt = tm_parser.TMscoreLogParser() else: msg = "Invalid method selected: %s", self.method logger.critical(msg) raise RuntimeError(msg) logger.info('Using algorithm: {0}'.format(self.method)) logger.info('------- Evaluating decoys -------') data_entries, job_scripts, log_files = [], [], [] for model_pdb, structure_pdb in zip(models, structures): model_name = os.path.splitext(os.path.basename(model_pdb))[0] structure_name = os.path.splitext( os.path.basename(structure_pdb))[0] stem = "_".join([model_name, structure_name, self.method]) if os.path.isfile(model_pdb) and os.path.isfile(structure_pdb): data_entries.append( [model_name, structure_name, model_pdb, structure_pdb]) script = make_script( [self.executable, model_pdb, structure_pdb], prefix="tmscore_", stem=stem, directory=self.tmp_dir) job_scripts.append(script) log_files.append(os.path.splitext(script)[0] + ".log") else: if not os.path.isfile(model_pdb): logger.warning("Cannot find: %s", model_pdb) if not os.path.isfile(structure_pdb): logger.warning("Cannot find: %s", structure_pdb) continue logger.info('Executing TManalysis scripts') j = Job(self._qtype) j.submit(job_scripts, nproc=self._nproc, max_array_jobs=self._max_array_jobs, queue=self._queue, name="tmscore") j.wait(interval=1) self.entries = [] for entry, log, script in zip(data_entries, log_files, job_scripts): try: pt.reset() pt.parse(log) except Exception: logger.critical("Error processing the %s log file: %s", self.method, log) log = "None" model_name, structure_name, model_pdb, structure_pdb = entry _entry = self._store(model_name, structure_name, model_pdb, structure_pdb, log, pt) self.entries.append(_entry) os.unlink(script) return self.entries
def comparison(self, models, structures): """ Compare a list of model structures to a second list of reference structures Parameters ---------- models : list List containing the paths to the model structure files structures : list List containing the paths to the reference structure files Returns ------- entries : list List of TMscore data entries on a per-model basis """ if len(models) < 1 or len(structures) < 1: msg = 'No model structures provided' if len(models) < 1 else \ 'No reference structures provided' logger.critical(msg) raise RuntimeError(msg) elif len(structures) == 1: logger.info( 'Using single structure provided for all model comparisons') structures = [structures[0] for _ in xrange(len(models))] elif len(models) != len(structures): msg = "Unequal number of models and structures" logger.critical(msg) raise RuntimeError(msg) # Create a logfile parser if self.method == "tmalign": pt = tm_parser.TMalignLogParser() elif self.method == "tmscore": pt = tm_parser.TMscoreLogParser() else: msg = "Invalid method selected: ", self.method logger.critical(msg) raise RuntimeError(msg) # ======================================================================= # Iterate through the structure files and execute the TMscore comparisons # ======================================================================= logger.info('Using algorithm: {0}'.format(self.method)) logger.info('------- Evaluating decoys -------') # Construct the job scripts data_entries = [] # Store some data job_scripts = [] # Hold job scripts log_files = [] # Hold paths to log files for model_pdb, structure_pdb in zip(models, structures): # Some file names model_name = os.path.splitext(os.path.basename(model_pdb))[0] structure_name = os.path.splitext( os.path.basename(structure_pdb))[0] prefix = '{0}_{1}_{2}'.format(model_name, structure_name, self.method) if not os.path.isfile(model_pdb): logger.warning("Cannot find: {0}".format(model_pdb)) continue elif not os.path.isfile(structure_pdb): logger.warning("Cannot find: {0}".format(structure_pdb)) continue # Create the run scripts script = tempfile.NamedTemporaryFile(prefix=prefix, suffix=ample_util.SCRIPT_EXT, delete=False) script.write(ample_util.SCRIPT_HEADER + os.linesep * 2) script.write('{exe} {model} {reference} {sep}{sep}'.format( exe=self.executable, model=model_pdb, reference=structure_pdb, sep=os.linesep, )) script.close() os.chmod(script.name, 0o777) job_scripts.append(script.name) # Save some more information data_entries.append( [model_name, structure_name, model_pdb, structure_pdb]) log_files.append(os.path.splitext(script.name)[0] + ".log") # Execute the scripts logger.info('Executing TManalysis scripts') logger.disabled = True success = workers_util.run_scripts( job_scripts=job_scripts, monitor=None, check_success=None, early_terminate=None, nproc=self._nproc, job_time= 7200, # Might be too long/short, taken from Rosetta modelling job_name='tm_analysis', submit_cluster=self._submit_cluster, submit_qtype=self._submit_qtype, submit_queue=self._submit_queue, submit_array=self._submit_array, submit_max_array=self._submit_max_array) logger.disabled = False if not success: msg = "Error running TManalysis" raise RuntimeError(msg) # Extract the data entries = [] for entry, log, script in zip(data_entries, log_files, job_scripts): try: # Reset the TM log parser to default values pt.reset() # Parse the TM method logfile to extract the data pt.parse(log) except Exception: msg = "Error processing the {0} log file: {1}".format( self.method, log) logger.critical(msg) log = "None" model_name, structure_name, model_pdb, structure_pdb = entry _entry = self._store(model_name, structure_name, model_pdb, structure_pdb, log, pt) entries.append(_entry) os.unlink(script) self.entries = entries return entries