Beispiel #1
0
 def benchmarking(self, optd):
     if optd['submit_cluster']:
         # Pickle dictionary so it can be opened by the job to get the parameters
         ample_util.save_amoptd(optd)
         script = benchmark_util.cluster_script(optd)
         workers_util.run_scripts(
             job_scripts=[script],
             monitor=monitor,
             nproc=optd['nproc'],
             job_time=43200,
             job_name='benchmark',
             submit_cluster=optd['submit_cluster'],
             submit_qtype=optd['submit_qtype'],
             submit_queue=optd['submit_queue'],
             submit_pe_lsf=optd['submit_pe_lsf'],
             submit_pe_sge=optd['submit_pe_sge'],
             submit_array=optd['submit_array'],
             submit_max_array=optd['submit_max_array'],
         )
         # queue finished so unpickle results
         optd.update(ample_util.read_amoptd(optd['results_path']))
     else:
         benchmark_util.analyse(optd)
         ample_util.save_amoptd(optd)
     return
Beispiel #2
0
 def run(self,
         nproc=1,
         dry_run=False,
         clean_up=True, 
         rosetta_dir=None,
         **kwargs):
     """Run the integration testing jobs and then the unittests to test them.
     
     In all cases jobs are run on a single processor. For running on a cluster, the
     ample job scripts have the queue directives added to them, and  each ample job
     is launched on the head node. The individual jobs then submit their various job
     stages to the queue and the integration test job just manages running all the 
     individual ample jobs until they have finished. Although this means lots of jobs
     running on the head node, the actual computation done on the head node should be minimal
     as all processing is submitted to the queue.
     
     Previously when running on a cluster we created a single single-processor serial ample script 
     for each job and then submitted a single array job to run all the jobs on the cluster. This
     approach had to be abandoned as (I think) the individual jobs timed out. 
     """
     logger.info("Writing files to: {0}".format(self.run_dir))
     
     if dry_run:
         clean_up = False
     
     if rosetta_dir and not os.path.isdir(rosetta_dir):
         print("Cannot find rosetta_dir: {0}".format(rosetta_dir))
         sys.exit(1)
     
     if clean_up:
         self.clean()
         
     scripts = self._create_scripts(rosetta_dir, **kwargs)
     if not len(scripts):
         raise RuntimeError("Could not find any test cases to run!")
     
     logger.info("The following test cases will be run:")
     for name in self.test_dict.keys():
         logger.info("{0}: {1}".format(name, self.run_dir))
     
     ## Run all the jobs
     # If we're running on a cluster, we run on as many processors as there are jobs, 
     # as the jobs are just sitting and monitoring the queue
     if 'submit_cluster' in kwargs and kwargs['submit_cluster']:
         logger.info("Jobs will be submitted to a cluster queueing system")
         nproc = len(scripts)
     
     if not dry_run:
         workers_util.run_scripts(job_scripts=scripts,
                                  monitor=None,
                                  nproc=nproc,
                                  job_name='test')
     
     # Now check the results using the unittesting framework
     self.run_unittest_suite()
     return 
Beispiel #3
0
    def run(self,
            nproc=1,
            dry_run=False,
            clean_up=True,
            rosetta_dir=None,
            **kwargs):
        """Run the integration testing jobs and then the unittests to test them.
        
        In all cases jobs are run on a single processor. For running on a cluster, the
        ample job scripts have the queue directives added to them, and  each ample job
        is launched on the head node. The individual jobs then submit their various job
        stages to the queue and the integration test job just manages running all the 
        individual ample jobs until they have finished. Although this means lots of jobs
        running on the head node, the actual computation done on the head node should be minimal
        as all processing is submitted to the queue.
        
        Previously when running on a cluster we created a single single-processor serial ample script 
        for each job and then submitted a single array job to run all the jobs on the cluster. This
        approach had to be abandoned as (I think) the individual jobs timed out. 
        """
        logger.info("Writing files to: {0}".format(self.run_dir))

        if dry_run:
            clean_up = False

        if rosetta_dir and not os.path.isdir(rosetta_dir):
            logger.debug("Cannot find rosetta_dir: {0}".format(rosetta_dir))
            sys.exit(1)

        if clean_up:
            self.clean()

        scripts = self._create_scripts(rosetta_dir, **kwargs)
        if not len(scripts):
            raise RuntimeError("Could not find any test cases to run!")

        logger.info("The following test cases will be run:")
        for name in self.test_dict.keys():
            logger.info("{0}: {1}".format(name, self.run_dir))

        ## Run all the jobs
        # If we're running on a cluster, we run on as many processors as there are jobs,
        # as the jobs are just sitting and monitoring the queue
        if 'submit_cluster' in kwargs and kwargs['submit_cluster']:
            logger.info("Jobs will be submitted to a cluster queueing system")
            nproc = len(scripts)

        if not dry_run:
            workers_util.run_scripts(job_scripts=scripts,
                                     monitor=None,
                                     nproc=nproc,
                                     job_name='test')

        # Now check the results using the unittesting framework
        self.run_unittest_suite()
        return
Beispiel #4
0
 def benchmarking(self, optd):
     if optd['submit_cluster']:
         # Pickle dictionary so it can be opened by the job to get the parameters
         ample_util.save_amoptd(optd)
         script = benchmark_util.cluster_script(optd)
         workers_util.run_scripts(
             job_scripts=[script],
             monitor=monitor,
             nproc=optd['nproc'],
             job_time=43200,
             job_name='benchmark',
             submit_cluster=optd['submit_cluster'],
             submit_qtype=optd['submit_qtype'],
             submit_queue=optd['submit_queue'],
             submit_pe_lsf=optd['submit_pe_lsf'],
             submit_pe_sge=optd['submit_pe_sge'],
             submit_array=optd['submit_array'],
             submit_max_array=optd['submit_max_array'])
         # queue finished so unpickle results
         optd.update(ample_util.read_amoptd(optd['results_path']))
     else:
         benchmark_util.analyse(optd)
         ample_util.save_amoptd(optd)
     return
Beispiel #5
0
    def molecular_replacement(self, optd):

        if not optd['mrbump_scripts']:
            # MRBUMP analysis of the ensembles
            logger.info('----- Running MRBUMP on ensembles--------\n\n')
            if len(optd['ensembles']) < 1:
                msg = "ERROR! Cannot run MRBUMP as there are no ensembles!"
                exit_util.exit_error(msg)

            if optd['mrbump_dir'] is None:
                bump_dir = os.path.join(optd['work_dir'], 'MRBUMP')
                optd['mrbump_dir'] = bump_dir
            else:
                bump_dir = optd['mrbump_dir']
            if not os.path.exists(bump_dir):
                os.mkdir(bump_dir)

            optd['mrbump_results'] = []
            logger.info("Running MRBUMP jobs in directory: %s", bump_dir)

            # Set an ensemble-specific phaser_rms if required
            if optd['phaser_rms'] == 'auto':
                ensembler.set_phaser_rms_from_subcluster_score(optd)

            # Sort the ensembles in a favourable way
            logger.info("Sorting ensembles")
            sort_keys = [
                'cluster_num', 'truncation_level',
                'subcluster_radius_threshold', 'side_chain_treatment'
            ]
            ensemble_pdbs_sorted = ensembler.sort_ensembles(
                optd['ensembles'],
                optd['ensembles_data'],
                keys=sort_keys,
                prioritise=True)

            # Create job scripts
            logger.info("Generating MRBUMP runscripts")
            optd['mrbump_scripts'] = mrbump_util.write_mrbump_files(
                ensemble_pdbs_sorted,
                optd,
                job_time=mrbump_util.MRBUMP_RUNTIME,
                ensemble_options=optd['ensemble_options'],
                directory=bump_dir)

        # Create function for monitoring jobs - static function decorator?
        if self.ample_output:

            def monitor():
                r = mrbump_util.ResultsSummary()
                r.extractResults(optd['mrbump_dir'], purge=bool(optd['purge']))
                optd['mrbump_results'] = r.results
                return self.ample_output.display_results(optd)
        else:
            monitor = None

        # Save results here so that we have the list of scripts and mrbump directory set
        ample_util.save_amoptd(optd)

        # Change to mrbump directory before running
        os.chdir(optd['mrbump_dir'])
        ok = workers_util.run_scripts(
            job_scripts=optd['mrbump_scripts'],
            monitor=monitor,
            check_success=mrbump_util.checkSuccess,
            early_terminate=optd['early_terminate'],
            nproc=optd['nproc'],
            job_time=mrbump_util.MRBUMP_RUNTIME,
            job_name='mrbump',
            submit_cluster=optd['submit_cluster'],
            submit_qtype=optd['submit_qtype'],
            submit_queue=optd['submit_queue'],
            submit_pe_lsf=optd['submit_pe_lsf'],
            submit_pe_sge=optd['submit_pe_sge'],
            submit_array=optd['submit_array'],
            submit_max_array=optd['submit_max_array'])

        if not ok:
            msg = "An error code was returned after running MRBUMP on the ensembles!\n" + \
                  "For further information check the logs in directory: {0}".format(optd['mrbump_dir'])
            logger.critical(msg)

        # Collect the MRBUMP results
        results_summary = mrbump_util.ResultsSummary()
        optd['mrbump_results'] = results_summary.extractResults(
            optd['mrbump_dir'], purge=bool(optd['purge']))
        optd['success'] = results_summary.success
        ample_util.save_amoptd(optd)
        summary = mrbump_util.finalSummary(optd)
        logger.info(summary)
Beispiel #6
0
    def ensembling(self, optd):
        if optd['import_ensembles']:
            ensembler.import_ensembles(optd)
        elif optd['ideal_helices']:
            ample_util.ideal_helices(optd)
            logger.info("*** Using ideal helices to solve structure ***")
        else:
            # Import the models here instead of cluster_util.
            if optd['cluster_method'] is 'import':
                # HACK - this is certainly not how we want to do it. One flag for all (-models) in future
                optd['models'] = optd['cluster_dir']
                optd['models'] = ample_util.extract_and_validate_models(optd)

            # Check we have some models to work with
            if not (optd['single_model_mode'] or optd['models']):
                ample_util.save_amoptd(optd)
                msg = "ERROR! Cannot find any pdb files in: {0}".format(
                    optd['models_dir'])
                exit_util.exit_error(msg)
            optd['ensemble_ok'] = os.path.join(optd['work_dir'], 'ensemble.ok')
            if optd['submit_cluster']:
                # Pickle dictionary so it can be opened by the job to get the parameters
                ample_util.save_amoptd(optd)
                script = ensembler.cluster_script(optd)
                ensembler_timeout = ensembler.get_ensembler_timeout(optd)
                workers_util.run_scripts(
                    job_scripts=[script],
                    monitor=monitor,
                    nproc=optd['nproc'],
                    job_time=ensembler_timeout,
                    job_name='ensemble',
                    submit_cluster=optd['submit_cluster'],
                    submit_qtype=optd['submit_qtype'],
                    submit_queue=optd['submit_queue'],
                    submit_pe_lsf=optd['submit_pe_lsf'],
                    submit_pe_sge=optd['submit_pe_sge'],
                    submit_array=optd['submit_array'],
                    submit_max_array=optd['submit_max_array'])
                # queue finished so unpickle results
                optd.update(ample_util.read_amoptd(optd['results_path']))
            else:
                try:
                    ensembler.create_ensembles(optd)
                except Exception as e:
                    msg = "Error creating ensembles: {0}".format(e)
                    exit_util.exit_error(msg, sys.exc_info()[2])

            # Check we have something to work with
            if not os.path.isfile(
                    optd['ensemble_ok']) or 'ensembles' not in optd.keys(
                    ) or not len(optd['ensembles']):
                msg = "Problem generating ensembles!"
                exit_util.exit_error(msg)

            if not (optd['homologs'] or optd['single_model_mode']):
                ensemble_summary = ensembler.ensemble_summary(
                    optd['ensembles_data'])
                logger.info(ensemble_summary)

        # Save the results
        ample_util.save_amoptd(optd)

        # Bail here if we didn't create anything
        if not len(optd['ensembles']):
            msg = "### AMPLE FAILED TO GENERATE ANY ENSEMBLES! ###\nExiting..."
            exit_util.exit_error(msg)

        # Update results view
        if self.ample_output:
            self.ample_output.display_results(optd)
        return
Beispiel #7
0
    def comparison(self, models, structures):
        """
        Compare a list of model structures to a second list of reference structures

        Parameters
        ----------
        models : list
           List containing the paths to the model structure files
        structures : list
           List containing the paths to the reference structure files

        Returns
        -------
        entries : list
           List of TMscore data entries on a per-model basis

        """

        if len(models) < 1 or len(structures) < 1:
            msg = 'No model structures provided' if len(models) < 1 else \
                'No reference structures provided'
            logger.critical(msg)
            raise RuntimeError(msg)

        elif len(structures) == 1:
            logger.info(
                'Using single structure provided for all model comparisons')
            structures = [structures[0] for _ in xrange(len(models))]

        elif len(models) != len(structures):
            msg = "Unequal number of models and structures"
            logger.critical(msg)
            raise RuntimeError(msg)

        # Create a logfile parser
        if self.method == "tmalign":
            pt = tm_parser.TMalignLogParser()
        elif self.method == "tmscore":
            pt = tm_parser.TMscoreLogParser()
        else:
            msg = "Invalid method selected: ", self.method
            logger.critical(msg)
            raise RuntimeError(msg)

        # =======================================================================
        # Iterate through the structure files and execute the TMscore comparisons
        # =======================================================================

        logger.info('Using algorithm: {0}'.format(self.method))
        logger.info('------- Evaluating decoys -------')

        # Construct the job scripts
        data_entries = []  # Store some data
        job_scripts = []  # Hold job scripts
        log_files = []  # Hold paths to log files
        for model_pdb, structure_pdb in zip(models, structures):
            # Some file names
            model_name = os.path.splitext(os.path.basename(model_pdb))[0]
            structure_name = os.path.splitext(
                os.path.basename(structure_pdb))[0]
            prefix = '{0}_{1}_{2}'.format(model_name, structure_name,
                                          self.method)
            if not os.path.isfile(model_pdb):
                logger.warning("Cannot find: {0}".format(model_pdb))
                continue
            elif not os.path.isfile(structure_pdb):
                logger.warning("Cannot find: {0}".format(structure_pdb))
                continue
            # Create the run scripts
            script = tempfile.NamedTemporaryFile(prefix=prefix,
                                                 suffix=ample_util.SCRIPT_EXT,
                                                 delete=False)
            script.write(ample_util.SCRIPT_HEADER + os.linesep * 2)
            script.write('{exe} {model} {reference} {sep}{sep}'.format(
                exe=self.executable,
                model=model_pdb,
                reference=structure_pdb,
                sep=os.linesep,
            ))
            script.close()
            os.chmod(script.name, 0o777)
            job_scripts.append(script.name)
            # Save some more information
            data_entries.append(
                [model_name, structure_name, model_pdb, structure_pdb])
            log_files.append(os.path.splitext(script.name)[0] + ".log")

        # Execute the scripts
        logger.info('Executing TManalysis scripts')
        logger.disabled = True
        success = workers_util.run_scripts(
            job_scripts=job_scripts,
            monitor=None,
            check_success=None,
            early_terminate=None,
            nproc=self._nproc,
            job_time=
            7200,  # Might be too long/short, taken from Rosetta modelling
            job_name='tm_analysis',
            submit_cluster=self._submit_cluster,
            submit_qtype=self._submit_qtype,
            submit_queue=self._submit_queue,
            submit_array=self._submit_array,
            submit_max_array=self._submit_max_array)
        logger.disabled = False

        if not success:
            msg = "Error running TManalysis"
            raise RuntimeError(msg)

        # Extract the data
        entries = []
        for entry, log, script in zip(data_entries, log_files, job_scripts):

            try:
                # Reset the TM log parser to default values
                pt.reset()
                # Parse the TM method logfile to extract the data
                pt.parse(log)
            except Exception:
                msg = "Error processing the {0} log file: {1}".format(
                    self.method, log)
                logger.critical(msg)
                log = "None"

            model_name, structure_name, model_pdb, structure_pdb = entry
            _entry = self._store(model_name, structure_name, model_pdb,
                                 structure_pdb, log, pt)
            entries.append(_entry)

            os.unlink(script)

        self.entries = entries
        return entries
Beispiel #8
0
    def subselect_decoys(self,
                         decoys,
                         decoy_format,
                         mode='linear',
                         subdistance_to_neighbor=24,
                         **kwargs):
        """Subselect decoys excluding those not satisfying long-distance restraints

        Parameters
        ----------
        decoys : list, tuple
           A list containing paths to decoy files
        decoy_format : str
           The file format of ``decoys``
        mode : str, optional
           The subselection mode to use
            * scaled: keep the decoys with scaled scores of >= 0.5
            * linear: keep the top half of decoys
            * cutoff: Keep all decoys with satisfaction scores of >= 0.287
        subdistance_to_neighbor : int, optional
           The minimum distance between neighboring residues in the subselection [default: 24]
        **kwargs
           Job submission related keyword arguments

        Returns
        -------
        list
           A list of paths to the sub-selected decoys

        """
        from ample.util import ample_util
        from ample.util import workers_util

        # Compute the long range contact satisfaction on a per-decoy basis
        logger.info(
            'Long-range contacts are defined with sequence separation of 24+')

        # Hack a custom copy of the contact map together that we can use with the script
        # All decoys should be sequence identical and thus we can just match it to the top
        contact_map = self.contact_map

        contact_map.match(conkit.io.read(decoys[0], decoy_format).top_map,
                          inplace=True)
        tmp_contact_file = tempfile.NamedTemporaryFile(delete=False)
        conkit.io.write(tmp_contact_file.name, 'casprr', contact_map)

        # Construct the job scripts
        job_scripts = []
        log_files = []
        executable = 'conkit-precision.bat' \
            if sys.platform.startswith('win') \
            else 'conkit-precision'
        for decoy in decoys:
            decoy_name = os.path.splitext(os.path.basename(decoy))[0]
            contact_name = os.path.splitext(os.path.basename(
                self.contact_file))[0]
            prefix = '{0}_{1}_'.format(contact_name, decoy_name)
            script = tempfile.NamedTemporaryFile(prefix=prefix,
                                                 suffix=ample_util.SCRIPT_EXT,
                                                 delete=False)

            # TODO: Get the log file business working properly
            cmd = [executable, '-d', subdistance_to_neighbor]
            if StrictVersion(conkit.__version__) <= StrictVersion('0.6.3'):
                cmd += [decoy]
            else:
                cmd += [decoy, decoy_format]
            cmd += [self.sequence_file, self.sequence_format]
            cmd += [tmp_contact_file.name, 'casprr']
            script.write(ample_util.SCRIPT_HEADER + os.linesep +
                         " ".join(map(str, cmd)) + os.linesep)
            script.close()
            os.chmod(script.name, 0o777)
            job_scripts.append(script.name)
            log_files.append(os.path.splitext(script.name)[0] + ".log")

        success = workers_util.run_scripts(
            job_scripts=job_scripts,
            monitor=None,
            check_success=None,
            early_terminate=None,
            nproc=kwargs['nproc'] if 'nproc' in kwargs else 1,
            job_time=
            7200,  # Might be too long/short, taken from Rosetta modelling
            job_name='subselect',
            submit_cluster=kwargs['submit_cluster']
            if 'submit_cluster' in kwargs else False,
            submit_qtype=kwargs['submit_qtype']
            if 'submit_qtype' in kwargs else None,
            submit_queue=kwargs['submit_queue']
            if 'submit_queue' in kwargs else False,
            submit_array=kwargs['submit_array']
            if 'submit_array' in kwargs else None,
            submit_max_array=kwargs['submit_max_array']
            if 'submit_max_array' in kwargs else None,
        )

        if not success:
            msg = "Error running decoy subselection"
            raise RuntimeError(msg)

        scores = numpy.zeros(len(decoys))
        for i, (decoy, log,
                script) in enumerate(zip(decoys, log_files, job_scripts)):
            for line in open(log, 'r'):
                if line.startswith('Precision score'):
                    scores[i] = float(line.strip().split()[-1])
            os.unlink(log)
            os.unlink(script)

        logger.info('Model selection mode: %s', mode)
        if mode == 'scaled':
            keep, throw = SubselectionAlgorithm.scaled(scores)
        elif mode == 'linear':
            keep, throw = SubselectionAlgorithm.linear(scores)
        elif mode == 'cutoff':
            keep, throw = SubselectionAlgorithm.cutoff(scores)
        else:
            msg = "Unknown sub-selection mode: {0}".format(mode)
            logger.critical(msg)
            raise ValueError(msg)

        if len(keep) < 1:
            msg = "Number of decoys to keep is 0 - defaulting to keeping all"
            logger.warning(msg)
            keep, throw = range(len(decoys)), []

        logger.info('Excluding %d decoy(s) from ensembling', len(throw))

        # TODO: return the scores so we can store them in AMPLE dict
        return tuple([decoys[i] for i in keep])
Beispiel #9
0
    def molecular_replacement(self, optd):
        mrbump_util.set_success_criteria(optd)
        if not optd['mrbump_scripts']:
            # MRBUMP analysis of the ensembles
            logger.info('----- Running MRBUMP on ensembles--------\n\n')
            if len(optd['ensembles']) < 1:
                msg = "ERROR! Cannot run MRBUMP as there are no ensembles!"
                exit_util.exit_error(msg)

            if optd['mrbump_dir'] is None:
                bump_dir = os.path.join(optd['work_dir'], 'MRBUMP')
                optd['mrbump_dir'] = bump_dir
            else:
                bump_dir = optd['mrbump_dir']
            if not os.path.exists(bump_dir):
                os.mkdir(bump_dir)

            optd['mrbump_results'] = []
            logger.info("Running MRBUMP jobs in directory: %s", bump_dir)

            # Set an ensemble-specific phaser_rms if required
            if optd['phaser_rms'] == 'auto':
                ensembler.set_phaser_rms_from_subcluster_score(optd)

            # Sort the ensembles in a favourable way
            logger.info("Sorting ensembles")
            sort_keys = ['cluster_num', 'truncation_level', 'subcluster_radius_threshold', 'side_chain_treatment']
            ensemble_pdbs_sorted = ensembler.sort_ensembles(
                optd['ensembles'], optd['ensembles_data'], keys=sort_keys, prioritise=True)

            # Create job scripts
            logger.info("Generating MRBUMP runscripts")
            optd['mrbump_scripts'] = mrbump_util.write_mrbump_files(
                ensemble_pdbs_sorted,
                optd,
                job_time=mrbump_util.MRBUMP_RUNTIME,
                ensemble_options=optd['ensemble_options'],
                directory=bump_dir)

        # Create function for monitoring jobs - static function decorator?
        if self.ample_output:
            def monitor():
                r = mrbump_util.ResultsSummary()
                r.extractResults(optd['mrbump_dir'], purge=bool(optd['purge']))
                optd['mrbump_results'] = r.results
                return self.ample_output.display_results(optd)
        else:
            monitor = None

        # Save results here so that we have the list of scripts and mrbump directory set
        ample_util.save_amoptd(optd)

        # Change to mrbump directory before running
        os.chdir(optd['mrbump_dir'])
        ok = workers_util.run_scripts(
            job_scripts=optd['mrbump_scripts'],
            monitor=monitor,
            check_success=mrbump_util.checkSuccess,
            early_terminate=optd['early_terminate'],
            nproc=optd['nproc'],
            job_time=mrbump_util.MRBUMP_RUNTIME,
            job_name='mrbump',
            submit_cluster=optd['submit_cluster'],
            submit_qtype=optd['submit_qtype'],
            submit_queue=optd['submit_queue'],
            submit_pe_lsf=optd['submit_pe_lsf'],
            submit_pe_sge=optd['submit_pe_sge'],
            submit_array=optd['submit_array'],
            submit_max_array=optd['submit_max_array'])

        if not ok:
            msg = "An error code was returned after running MRBUMP on the ensembles!\n" + \
                  "For further information check the logs in directory: {0}".format(optd['mrbump_dir'])
            logger.critical(msg)

        # Collect the MRBUMP results
        results_summary = mrbump_util.ResultsSummary()
        optd['mrbump_results'] = results_summary.extractResults(optd['mrbump_dir'], purge=bool(optd['purge']))
        optd['success'] = results_summary.success
        ample_util.save_amoptd(optd)
        summary = mrbump_util.finalSummary(optd)
        logger.info(summary)
Beispiel #10
0
    def ensembling(self, optd):
        if optd['import_ensembles']:
            ensembler.import_ensembles(optd)
        elif optd['ideal_helices']:
            ample_util.ideal_helices(optd)
            logger.info("*** Using ideal helices to solve structure ***")
        else:
            # Check we have some models to work with
            if not (optd['single_model_mode'] or optd['processed_models']):
                ample_util.save_amoptd(optd)
                msg = "ERROR! Cannot find any pdb files in: {0}".format(optd['models_dir'])
                exit_util.exit_error(msg)
            optd['ensemble_ok'] = os.path.join(optd['work_dir'], 'ensemble.ok')
            if optd['submit_cluster']:
                # Pickle dictionary so it can be opened by the job to get the parameters
                ample_util.save_amoptd(optd)
                script = ensembler.cluster_script(optd)
                ensembler_timeout = ensembler.get_ensembler_timeout(optd)
                workers_util.run_scripts(
                    job_scripts=[script],
                    monitor=monitor,
                    nproc=optd['nproc'],
                    job_time=ensembler_timeout,
                    job_name='ensemble',
                    submit_cluster=optd['submit_cluster'],
                    submit_qtype=optd['submit_qtype'],
                    submit_queue=optd['submit_queue'],
                    submit_pe_lsf=optd['submit_pe_lsf'],
                    submit_pe_sge=optd['submit_pe_sge'],
                    submit_array=optd['submit_array'],
                    submit_max_array=optd['submit_max_array'])
                # queue finished so unpickle results
                optd.update(ample_util.read_amoptd(optd['results_path']))
            else:
                try:
                    ensembler.create_ensembles(optd)
                except Exception as e:
                    msg = "Error creating ensembles: {0}".format(e)
                    exit_util.exit_error(msg, sys.exc_info()[2])

            # Check we have something to work with
            if not os.path.isfile(optd['ensemble_ok']) or 'ensembles' not in optd.keys() or not len(optd['ensembles']):
                msg = "Problem generating ensembles!"
                exit_util.exit_error(msg)

            if not (optd['homologs'] or optd['single_model_mode']):
                ensemble_summary = ensembler.ensemble_summary(optd['ensembles_data'])
                logger.info(ensemble_summary)

        # Save the results
        ample_util.save_amoptd(optd)

        # Bail here if we didn't create anything
        if not len(optd['ensembles']):
            msg = "### AMPLE FAILED TO GENERATE ANY ENSEMBLES! ###\nExiting..."
            exit_util.exit_error(msg)

        # Update results view
        if self.ample_output:
            self.ample_output.display_results(optd)
        return
Beispiel #11
0
def rerun_shelxe(args):
    logger.info('Preparing scripts')

    # Unpickle dictionary
    amopt_pkl = args.ample_pkl
    with open(args.ample_pkl) as f:
        amoptd = cPickle.load(f)
    assert 'mrbump_results' in amoptd, "No MRBUMP results in: %s" % amopt_pkl

    # Back up old AMPLE pkl file - preserve metadata
    #assert not os.path.isfile(amopt_pkl + BK_SUFFIX)
    shutil.copy2(amopt_pkl, amopt_pkl + BK_SUFFIX)

    if True:
        # Get list of jobs to rerun the SHELXE pipeline
        job_scripts = create_scripts(amoptd, args)

        # Run the jobs
        logger.info("Running scripts:\n{0}".format(
            os.linesep.join(job_scripts)))
        ok = workers_util.run_scripts(job_scripts=job_scripts,
                                      nproc=args.nproc,
                                      submit_cluster=args.submit_cluster,
                                      submit_qtype="SGE",
                                      submit_queue="all.q",
                                      submit_array=True,
                                      submit_max_array=10)

    # Collect results from completed jobs
    for i, oldd in enumerate(amoptd['mrbump_results']):
        #if i == 1: continue
        #print("CHECKING ",oldd['Search_directory'])
        #print sorted(d.keys())

        # Add SHELXE, ARPWARP and BUCCANEER results to a dictionary
        # We need to manually set the path to the arp and bucc logfiles as
        # they may not have been set in the previous run.

        newd = {}
        if oldd['SHELXE_logfile'] and os.path.isfile(oldd['SHELXE_logfile']):
            newd = shelxe_results(oldd['SHELXE_logfile'], newd)
        if oldd['SXRARP_logfile'] and os.path.isfile(oldd['SXRARP_logfile']):
            newd = arp_results(oldd['SXRARP_logfile'], newd)
        if oldd['SXRBUCC_logfile'] and os.path.isfile(oldd['SXRBUCC_logfile']):
            newd = bucc_results(oldd['SXRBUCC_logfile'], newd)

        # Update AMPLE and MRBUMP dictionaries with new values
        mrb_pkl = os.path.join(oldd['Search_directory'], 'results',
                               'resultsTable.pkl')
        assert not os.path.isfile(mrb_pkl + BK_SUFFIX)
        #shutil.copy2(mrb_pkl, mrb_pkl + BK_SUFFIX) # Backup old MRBUMP results
        with open(mrb_pkl) as w:
            mrb_dict = cPickle.load(w)

        # Update values in dictionaries
        for k in newd.keys():
            oldd[k] = newd[k]
            mrb_dict[oldd['name']][oldd['MR_program']][k] = newd[k]

        # Writ out updated mrbump dict
        with open(mrb_pkl, 'w') as w:
            cPickle.dump(mrb_dict, w)
        #break

    # Write out the updated amoptd
    with open(amopt_pkl, 'w') as w:
        cPickle.dump(amoptd, w)

    return job_scripts