Esempio n. 1
0
    def setup_ccp4(self, amoptd):
        """Check CCP4 is available and return the top CCP4 directory"""
        # Make sure CCP4 is around
        if not "CCP4" in os.environ:
            msg = "Cannot find CCP4 installation - please make sure CCP4 is installed and the setup scripts have been run!"
            exit_util.exit_error(msg)

        if not "CCP4_SCR" in os.environ:
            msg = "$CCP4_SCR environement variable not set - please make sure CCP4 is installed and the setup scripts have been run!"
            exit_util.exit_error(msg)

        if not os.path.isdir(os.environ['CCP4_SCR']):
            msg = "*** WARNING ***\n"
            msg += "Cannot find the $CCP4_SCR directory: {0}\n".format(
                os.environ['CCP4_SCR'])
            msg += "The directory will be created, but it should have already been created by the CCP4 startup scripts\n"
            msg += "Please make sure CCP4 is installed and the setup scripts have been run."
            logger.critical(msg)
            os.mkdir(os.environ['CCP4_SCR'])
            #exit_util.exit_error(msg)

        # Record the CCP4 version we're running with  - also required in pyrvapi_results
        amoptd['ccp4_version'] = ample_util.ccp4_version()

        return os.environ['CCP4']
Esempio n. 2
0
def process_rosetta_options(optd):
    # Create the rosetta modeller - this runs all the checks required
    rosetta_modeller = None
    if optd['make_models'] or optd[
            'make_frags']:  # only need Rosetta if making models
        logger.info('Using ROSETTA so checking options')
        try:
            rosetta_modeller = rosetta_model.RosettaModel(optd=optd)
        except Exception, e:
            msg = "Error setting ROSETTA options: {0}".format(e)
            exit_util.exit_error(msg)
Esempio n. 3
0
def process_rosetta_options(optd):
    # Create the rosetta modeller - this runs all the checks required
    rosetta_modeller = None
    if optd['make_models'] or optd['make_frags']:  # only need Rosetta if making models
        logger.info('Using ROSETTA so checking options')
        try:
            rosetta_modeller = rosetta_model.RosettaModel(optd=optd)
        except Exception as e:
            msg = "Error setting ROSETTA options: {0}".format(e)
            exit_util.exit_error(msg)
        optd['modelling_workdir'] = rosetta_modeller.work_dir
    return rosetta_modeller
Esempio n. 4
0
def handle_model_import(amoptd, results):
    """Handle any errors flagged up by importing models and set any options based on the type of models."""
    error_msg = None
    if results.error:
        error_msg = "Error importing models: {}".format(results.error)
    elif results.homologs and not amoptd['homologs']:
        error_msg = "Imported models were not sequence identical, but homologs mode wasn't selected"
    if error_msg:
        exit_util.exit_error(error_msg)
    
    if results.single_ensemble and amoptd['webserver_uri']:
        logger.info("** Webserver mode got single NMR model so turning on NMR mode **")
        amoptd['nmr_model_in'] = amoptd['processed_models'][0]
    elif results.homologs and amoptd['webserver_uri']:
        logger.info("** Webserver mode got a directory of homologs so turning on Homolog mode **")
        amoptd['homologs'] = True
Esempio n. 5
0
    def modelling(self, optd, rosetta_modeller=None):
        if not (optd['make_frags'] or optd['make_models']
                or optd['nmr_remodel']):
            return
        # Set the direcotry where the final models will end up
        optd['models_dir'] = os.path.join(optd['work_dir'], 'models')
        if not os.path.isdir(optd['models_dir']):
            os.mkdir(optd['models_dir'])
        if not rosetta_modeller:
            rosetta_modeller = options_processor.process_rosetta_options(optd)
        # Make Rosetta fragments
        if optd['make_frags']:
            rosetta_modeller.generate_fragments(optd)
            optd['frags_3mers'] = rosetta_modeller.frags_3mers
            optd['frags_9mers'] = rosetta_modeller.frags_9mers
            optd['psipred_ss2'] = rosetta_modeller.psipred_ss2

        con_util = self.handle_contacts(optd)
        if optd['restraints_file']:
            rosetta_modeller.restraints_file = optd['restraints_file']

        if optd['make_models']:
            logger.info('----- making Rosetta models--------')
            logger.info('Making %s models...', optd['nmodels'])
            try:
                optd['processed_models'] = rosetta_modeller.ab_initio_model(
                    processed_models=optd['processed_models'])
            except Exception as e:
                msg = "Error running ROSETTA to create models: {0}".format(e)
                exit_util.exit_error(msg, sys.exc_info()[2])
            logger.info('Modelling complete - models stored in: %s',
                        optd['models_dir'])

        # Sub-select the decoys using contact information
        if con_util and optd['subselect_mode'] and not (optd['nmr_model_in'] or
                                                        optd['nmr_remodel']):
            logger.info(
                'Subselecting models from directory using provided contact information'
            )
            subselect_data = con_util.subselect_decoys(
                optd['processed_models'],
                'pdb',
                mode=optd['subselect_mode'],
                **optd)
            optd['processed_models'] = zip(*subselect_data)[0]
            optd['subselect_data'] = dict(subselect_data)
Esempio n. 6
0
def import_ensembles(amoptd):
    """Import ensembles using their file paths

    Parameters
    ----------
    amoptd : dict
       An AMPLE option dictionary
    
    Returns
    -------
    list
       A list of absolute files paths of the ensembles

    """
    if not pdb_edit.check_pdb_directory(amoptd['ensembles'], single=False):
        msg = "Cannot import ensembles from the directory: {0}".format(
            amoptd['ensembles'])
        exit_util.exit_error(msg)

    logger.info("Importing ensembles from directory: {0}".format(
        amoptd['ensembles']))

    ensembles = glob.glob(os.path.join(amoptd['ensembles'], '*.pdb'))
    amoptd['ensembles'] = ensembles

    # get the data on the ensemble
    ensembles_data = []
    for e in ensembles:
        d = {}
        d['name'] = os.path.splitext(os.path.basename(e))[0]
        d['ensemble_pdb'] = e

        # Get data on the models
        hierarchy = iotbx.pdb.pdb_input(file_name=e).construct_hierarchy()
        d['subcluster_num_models'] = len(hierarchy.models())
        d['num_residues'] = len(
            hierarchy.models()[0].chains()[0].residue_groups())
        d['ensemble_num_atoms'] = len(hierarchy.models()[0].atoms())

        ensembles_data.append(d)

    amoptd['ensembles_data'] = ensembles_data

    return ensembles
Esempio n. 7
0
def handle_model_import(amoptd, results):
    """Handle any errors flagged up by importing models and set any options based on the type of models."""
    error_msg = None
    if results.error:
        error_msg = "Error importing models: {}".format(results.error)
    elif results.homologs and not amoptd['homologs']:
        error_msg = "Imported models were not sequence identical, but homologs mode wasn't selected"
    if error_msg:
        exit_util.exit_error(error_msg)

    if results.single_ensemble and amoptd['webserver_uri']:
        logger.info(
            "** Webserver mode got single NMR model so turning on NMR mode **")
        amoptd['nmr_model_in'] = amoptd['processed_models'][0]
    elif results.homologs and amoptd['webserver_uri']:
        logger.info(
            "** Webserver mode got a directory of homologs so turning on Homolog mode **"
        )
        amoptd['homologs'] = True
Esempio n. 8
0
def import_ensembles(amoptd):
    """Import ensembles using their file paths

    Parameters
    ----------
    amoptd : dict
       An AMPLE option dictionary

    Returns
    -------
    list
       A list of absolute files paths of the ensembles

    """

    logger.info("Importing ensembles from directory: {0}".format(amoptd['ensembles']))

    ensembles = glob.glob(os.path.join(amoptd['ensembles'], '*.pdb'))
    if not len(ensembles):
        msg = "Cannot import ensembles from the directory: {0}".format(amoptd['ensembles'])
        exit_util.exit_error(msg)
    amoptd['ensembles'] = ensembles

    # get the data on the ensemble
    ensembles_data = []
    for e in ensembles:
        d = {}
        d['name'] = os.path.splitext(os.path.basename(e))[0]
        d['ensemble_pdb'] = e

        # Get data on the models
        hierarchy = iotbx.pdb.pdb_input(file_name=e).construct_hierarchy()
        d['subcluster_num_models'] = len(hierarchy.models())
        d['num_residues'] = len(hierarchy.models()[0].chains()[0].residue_groups())
        d['ensemble_num_atoms'] = len(hierarchy.models()[0].atoms())

        ensembles_data.append(d)

    amoptd['ensembles_data'] = ensembles_data

    return ensembles
Esempio n. 9
0
def process_options(optd):
    """Process the initial options from the command-line/ample.ini file to set any additional options.
    
    Description
    -----------
    This is where we take the options determining the type of run we are undertaking and set any additional 
    options required based on that runtype. All the major 
    """
    # Path for pickling results
    optd['results_path'] = os.path.join(optd['work_dir'], AMPLE_PKL)

    ###############################################################################
    #
    # FASTA processing
    #
    ###############################################################################
    # Check to see if mr_sequence was given and if not mr_sequence defaults to fasta
    if optd['mr_sequence'] != None:
        if not (os.path.exists(str(optd['mr_sequence']))):
            msg = 'Cannot find mr sequence file: {0}'.format(
                optd['mr_sequence'])
            exit_util.exit_error(msg)
    else:
        optd['mr_sequence'] = optd['fasta']

    # Process the fasta file and run all the checks on the sequence
    sequence_util.process_fasta(optd)

    #
    # Not sure if name actually required - see make_fragments.pl
    #
    if optd['name'] and len(optd['name']) != 4:
        msg = '-name argument is the wrong length, use 4 chars eg ABCD'
        exit_util.exit_error(msg)

    # Underscore required by rosetta make_fragments.pl
    optd['name'] += '_'

    ###############################################################################
    #
    # Contact file processing
    #
    ###############################################################################

    if optd['contact_file'] or optd[
            'bbcontacts_file'] or not optd["no_contact_prediction"]:
        contact_util.ContactUtil.check_options(optd)
        optd['use_contacts'] = True
    ###############################################################################
    #
    # MTZ file processing
    #
    ###############################################################################
    try:
        mtz_util.processReflectionFile(optd)
    except Exception, e:
        msg = "Error processing reflection file: {0}".format(e)
        exit_util.exit_error(msg, sys.exc_info()[2])
Esempio n. 10
0
    def setup_workdir(self, argso):
        # Make a work directory - this way all output goes into this directory
        if argso['work_dir'] and not argso['restart_pkl']:
            print('Making a named work directory: %s', argso['work_dir'])
            try:
                os.mkdir(argso['work_dir'])
            except Exception as e:
                msg = "Cannot create work_dir {0}: {1}".format(argso['work_dir'], e)
                exit_util.exit_error(msg, sys.exc_info()[2])

        if not argso['work_dir']:
            if not os.path.exists(argso['run_dir']):
                msg = 'Cannot find run directory: {0}'.format(argso['run_dir'])
                exit_util.exit_error(msg, sys.exc_info()[2])
            if bool(argso['rvapi_document']):
                # With JSCOFE we run in the run directory
                argso['work_dir'] = argso['run_dir']
            else:
                print('Making a run directory: ' 'checking for previous runs...')
                argso['work_dir'] = ample_util.make_workdir(argso['run_dir'],
                                                            ccp4i2=bool(argso['ccp4i2_xml']))
        os.chdir(argso['work_dir'])
        return argso['work_dir']
Esempio n. 11
0
    def modelling(self, optd, rosetta_modeller=None):
        if not (optd['make_frags'] or optd['make_models'] or optd['nmr_remodel']):
            return
        # Set the direcotry where the final models will end up
        optd['models_dir'] = os.path.join(optd['work_dir'], 'models')
        if not os.path.isdir(optd['models_dir']):
            os.mkdir(optd['models_dir'])
        if not rosetta_modeller:
            rosetta_modeller = options_processor.process_rosetta_options(optd)
        # Make Rosetta fragments
        if optd['make_frags']:
            rosetta_modeller.generate_fragments(optd)
            optd['frags_3mers'] = rosetta_modeller.frags_3mers
            optd['frags_9mers'] = rosetta_modeller.frags_9mers
            optd['psipred_ss2'] = rosetta_modeller.psipred_ss2

        con_util = self.handle_contacts(optd)
        if optd['restraints_file']:
            rosetta_modeller.restraints_file = optd['restraints_file']

        if optd['make_models']:
            logger.info('----- making Rosetta models--------')
            logger.info('Making %s models...', optd['nmodels'])
            try:
                optd['processed_models'] = rosetta_modeller.ab_initio_model(processed_models = optd['processed_models'])
            except Exception as e:
                msg = "Error running ROSETTA to create models: {0}".format(e)
                exit_util.exit_error(msg, sys.exc_info()[2])
            logger.info('Modelling complete - models stored in: %s', optd['models_dir'])

        # Sub-select the decoys using contact information
        if con_util and optd['subselect_mode'] and not (optd['nmr_model_in'] or optd['nmr_remodel']):
            logger.info('Subselecting models from directory using provided contact information')
            subselect_data = con_util.subselect_decoys(optd['processed_models'], 'pdb', mode=optd['subselect_mode'], **optd)
            optd['processed_models'] = zip(*subselect_data)[0]
            optd['subselect_data'] = dict(subselect_data)
Esempio n. 12
0
 def setup_workdir(self, argso):
     """Make a work directory - this way all output goes into this directory.
     
     This is done before the loggers has been set up so no logging is possible.
     """
     if argso['work_dir'] and not argso['restart_pkl']:
         try:
             os.mkdir(argso['work_dir'])
         except Exception as e:
             msg = "Cannot create work_dir {0}: {1}".format(
                 argso['work_dir'], e)
             exit_util.exit_error(msg, sys.exc_info()[2])
     if not argso['work_dir']:
         if not os.path.exists(argso['run_dir']):
             msg = 'Cannot find run directory: {0}'.format(argso['run_dir'])
             exit_util.exit_error(msg, sys.exc_info()[2])
         if argso['rvapi_document']:
             # With JSCOFE we run in the run directory
             argso['work_dir'] = argso['run_dir']
         else:
             argso['work_dir'] = ample_util.make_workdir(
                 argso['run_dir'], ccp4i2=bool(argso['ccp4i2_xml']))
     os.chdir(argso['work_dir'])
     return argso['work_dir']
Esempio n. 13
0
def create_ensembles(amoptd):
    """Create the ensembles using the values in the amoptd dictionary
    
    Parameters
    ----------
    amoptd : dict
       An AMPLE option dictionary

    """
    # Create instance of the ensembler
    ensembler = ensembler_factory(amoptd)

    ############################################################################
    # For a single model we don't need to use glob
    if not (amoptd['single_model'] or amoptd['models']):
        msg = 'AMPLE ensembler needs either a single_model or a list of models'
        exit_util.exit_error(msg, sys.exc_info()[2])
        if amoptd['single_model'] and not os.path.isfile(amoptd['single_model']):
            msg = 'Cannot find single_model pdb: {0}'.format(
                amoptd['single_model'])
            exit_util.exit_error(msg, sys.exc_info()[2])
        elif amoptd['models'] and len(amoptd['models'] < 2):
            msg = 'Not enough models provided for ensembling - use single_model_mode instead'
            exit_util.exit_error(msg, sys.exc_info()[2])

    models = list([amoptd['single_model']]
                  ) if amoptd['single_model_mode'] else amoptd['models']

    #if amoptd['cluster_method'] == 'spicker_tmscore':
    #    models = reorder_models(models, amoptd['score_matrix_file_list'])
    #    if not (os.path.isfile(amoptd['score_matrix']) and os.path.isfile(amoptd['score_matrix_file_list'])):
    #        raise RuntimeError("spicker_tmscore needs a score_matrix and score_matrix_file_list")
    #    ensembler.score_matrix = amoptd['score_matrix']

    # Run ensemble creation
    ensembles = ensembler.generate_ensembles_from_amoptd(models, amoptd)

    ############################################################################
    # Hack to pull out the data - need to update code to work with ensemble objects rather than dictionaries
    amoptd['ensembles'] = [e.pdb for e in ensembles]
    amoptd['ensembles_data'] = [e.__dict__ for e in ensembles]

    # We need to let the main process know that we have succeeded as this module could be run on a cluster node with no link
    # to the parent process, so we create a file here indicating that we got this far and didn't die from an exception
    with open(amoptd['ensemble_ok'], 'w') as f:
        f.write('ok\n')

    # Delete all intermediate files if we're purging
    if amoptd['purge']:
        shutil.rmtree(ensembler.work_dir)
    return
Esempio n. 14
0
def create_ensembles(amoptd):
    """Create the ensembles using the values in the amoptd dictionary

    Parameters
    ----------
    amoptd : dict
       An AMPLE option dictionary

    """
    # Create instance of the ensembler
    ensembler = ensembler_factory(amoptd)

    ############################################################################
    # For a single model we don't need to use glob
    if not (amoptd['single_model'] or amoptd['processed_models']):
        msg = 'AMPLE ensembler needs either a single_model or a list of models'
        exit_util.exit_error(msg, sys.exc_info()[2])
        if amoptd['single_model'] and not os.path.isfile(
                amoptd['single_model']):
            msg = 'Cannot find single_model pdb: {0}'.format(
                amoptd['single_model'])
            exit_util.exit_error(msg, sys.exc_info()[2])
        elif amoptd['processed_models'] and len(
                amoptd['processed_models'] < 2):
            msg = 'Not enough models provided for ensembling - use single_model_mode instead'
            exit_util.exit_error(msg, sys.exc_info()[2])

    models = list([
        amoptd['single_model']
    ]) if amoptd['single_model_mode'] else amoptd['processed_models']

    # Run ensemble creation
    ensembles = ensembler.generate_ensembles_from_amoptd(models, amoptd)

    ############################################################################
    # Hack to pull out the data - need to update code to work with ensemble objects rather than dictionaries
    amoptd['ensembles'] = [e.pdb for e in ensembles]
    amoptd['ensembles_data'] = [e.__dict__ for e in ensembles]
    amoptd['ensembles_workdir'] = ensembler.work_dir

    # We need to let the main process know that we have succeeded as this module could be run on a cluster node with no link
    # to the parent process, so we create a file here indicating that we got this far and didn't die from an exception
    with open(amoptd['ensemble_ok'], 'w') as fh:
        fh.write('ok\n')
    return
Esempio n. 15
0
def create_ensembles(amoptd):
    """Create the ensembles using the values in the amoptd dictionary

    Parameters
    ----------
    amoptd : dict
       An AMPLE option dictionary

    """
    # Create instance of the ensembler
    ensembler = ensembler_factory(amoptd)

    ############################################################################
    # For a single model we don't need to use glob
    if not (amoptd['single_model'] or amoptd['processed_models']):
        msg = 'AMPLE ensembler needs either a single_model or a list of models'
        exit_util.exit_error(msg, sys.exc_info()[2])
        if amoptd['single_model'] and not os.path.isfile(amoptd['single_model']):
            msg = 'Cannot find single_model pdb: {0}'.format(amoptd['single_model'])
            exit_util.exit_error(msg, sys.exc_info()[2])
        elif amoptd['processed_models'] and len(amoptd['processed_models'] < 2):
            msg = 'Not enough models provided for ensembling - use single_model_mode instead'
            exit_util.exit_error(msg, sys.exc_info()[2])

    models = list([amoptd['single_model']]) if amoptd['single_model_mode'] else amoptd['processed_models']

    # Run ensemble creation
    ensembles = ensembler.generate_ensembles_from_amoptd(models, amoptd)

    ############################################################################
    # Hack to pull out the data - need to update code to work with ensemble objects rather than dictionaries
    amoptd['ensembles'] = [e.pdb for e in ensembles]
    amoptd['ensembles_data'] = [e.__dict__ for e in ensembles]
    amoptd['ensembles_workdir'] = ensembler.work_dir

    # We need to let the main process know that we have succeeded as this module could be run on a cluster node with no link
    # to the parent process, so we create a file here indicating that we got this far and didn't die from an exception
    with open(amoptd['ensemble_ok'], 'w') as fh:
        fh.write('ok\n')
    return
Esempio n. 16
0
def extract_zip(filename, directory, suffixes=None):
    # zip file extraction
    logger.info('Extracting files from zipfile: %s', filename)
    if not zipfile.is_zipfile(filename):
        msg = 'File is not a valid zip archive: {0}'.format(filename)
        exit_util.exit_error(msg)
    zipf = zipfile.ZipFile(filename)
    zif = zipf.infolist()
    if not zif:
        msg = 'Empty zip file: {0}'.format(filename)
        exit_util.exit_error(msg)
    files = []
    for f in zif:
        if os.path.splitext(f.filename)[1] in suffixes:
            # Hack to rewrite name
            f.filename = os.path.basename(f.filename)
            zipf.extract(f, path=directory)
            files.append(os.path.join(directory, f.filename))
    if not files:
        msg = 'Could not find any files with suffixes {0} in zipfile: {1}'.format(suffixes, filename)
        exit_util.exit_error(msg)
    return files
Esempio n. 17
0
                    time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
        logger.info("Invoked with command-line:\n%s\n", " ".join(sys.argv))
        logger.info("Running in directory: %s\n", optd['work_dir'])

        if pyrvapi_results.pyrvapi:
            self.ample_output = pyrvapi_results.AmpleOutput(optd)
            self.ample_output.display_results(optd)

        options_processor.check_mandatory_options(optd)

        optd = options_processor.process_restart_options(optd)
        if not optd['restart_pkl']:
            options_processor.process_options(optd)

        if optd['dry_run']:
            logger.info('Dry run finished checking options - cleaning up...')
            os.chdir(optd['run_dir'])
            shutil.rmtree(optd['work_dir'])
            sys.exit(0)

        logger.info('All needed programs are found, continuing...')
        return optd


if __name__ == "__main__":
    try:
        Ample().main()
    except Exception as e:
        msg = "Error running main AMPLE program: {0}".format(e.message)
        exit_util.exit_error(msg, sys.exc_info()[2])
Esempio n. 18
0
    ###############################################################################

    # Set default name for modelling directory
    optd['models_dir'] = os.path.join(optd['work_dir'], "models")

    # Check if importing ensembles
    if optd['ensembles']:
        # checks are made in ensembles.import_ensembles
        optd['import_ensembles'] = True
        optd['make_frags'] = False
        optd['make_models'] = False
    elif optd['cluster_dir']:
        if not os.path.isdir(optd['cluster_dir']):
            msg = "Import cluster cannot find directory: {0}".format(
                optd['cluster_dir'])
            exit_util.exit_error(msg)
        if not glob.glob(os.path.join(optd['cluster_dir'], "*.pdb")):
            msg = "Import cluster cannot find pdbs in directory: {0}".format(
                optd['cluster_dir'])
            exit_util.exit_error(msg)
        logger.info(
            "Importing pre-clustered models from directory: {0}\n".format(
                optd['cluster_dir']))
        optd['cluster_method'] = 'import'
        optd['make_frags'] = False
        optd['make_models'] = False
    elif optd['ideal_helices']:
        optd['make_frags'] = False
        optd['make_models'] = False
    elif optd['homologs']:
        optd['make_frags'] = False
Esempio n. 19
0
from ample.testing import run_tests

logger = logging_util.setup_console_logging(level=logging.INFO, formatstr='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

#############################################################################
## Multiprocessing crashes on Windows when running multiple jobs.
#  Issue recorded 
#       1) https://docs.python.org/2/library/multiprocessing.html#windows
if sys.platform.startswith("win"):
    msg = """
*****************************************************************************
A bug prevents you from invoking our testing framework via the module loader. 
                                                                              
Please invoke using the following command:                                    
                                                                              
% ccp4-python {0}{1}run_tests.py <command> [<args>]                           
*****************************************************************************
"""
    msg.format(os.path.dirname(__file__), os.sep)
    logger.critical(msg)
    sys.exit(1)

#############################################################################
## On Unix systems we can run as normal
try:
    run_tests.main()
except Exception as e:
    msg = "Error running Ample testsuite: {0}".format(e.message)
    logger.critical(msg)
    exit_util.exit_error(msg, sys.exc_info()[2])
Esempio n. 20
0
def process_restart_options(optd):
    """Process the restart options

    Description
    -----------
    For any new command-line options, we update the old dictionary with the new values
    We then go through the new dictionary and set ant of the flags corresponding to the data we find:
    
    if restart.pkl
    - if completed mrbump jobs
        make_frags, make_models, make_ensembles = False
        make_mr = True
      - if all jobs aren't completed, rerun the remaining mrbump jobs - IN THE OLD DIRECTORY?
      - if all jobs are completed and we are in benchmark mode run the benchmarking
        make_frags, make_models, make_ensembles, make_mr = False
        make_benchmark = True
      - END
    - if ensemble files
       - if no ensemble data, create ensemble data
       make_frags, make_models, make_ensembles = False
       make_mr = True
       - create and run the mrbump jobs - see above
       
       # BElow all same as default
    - if models and no ensembles
      - create ensembles from the models
    
    FLAGS
    make_frags
    make_models
    make_ensembles
    make_mr
    make_benchmark
    
    Notes
    -----
    We return the dictionary as we may need to change it and it seems we can't change the external
    reference in this scope. I think?...

    """
    if not optd['restart_pkl']:
        return optd
    logger.info('Restarting from existing pkl file: {0}'.format(
        optd['restart_pkl']))

    # Go through and see what we need to do
    # Reset all variables for doing stuff - otherwise we will always restart from the earliest point
    optd['make_ensembles'] = False
    #optd['import_ensembles'] = False # Needs thinking about - have to set so we don't just reimport models/ensembles
    optd['import_models'] = False  # Needs thinking about
    optd['make_models'] = False
    optd['make_frags'] = False

    # First see if we should benchmark this job. The user may not have supplied a native_pdb with the original
    # job and we only set benchmark mode on seeing the native_pdb
    if optd['native_pdb']:
        if not os.path.isfile(optd['native_pdb']):
            msg = "Cannot find native_pdb: {0}".format(optd['native_pdb'])
            logger.critical(msg)
            raise RuntimeError(msg)
        optd['benchmark_mode'] = True
        logger.info('Restart using benchmark mode')

    # We always check first to see if there are any mrbump jobs
    optd['mrbump_scripts'] = []
    if 'mrbump_dir' in optd:
        optd['mrbump_scripts'] = mrbump_util.unfinished_scripts(optd)
        if not optd['mrbump_scripts']:
            optd['do_mr'] = False

    if optd['do_mr']:
        if len(optd['mrbump_scripts']):
            logger.info(
                'Restarting from unfinished mrbump scripts: {0}'.format(
                    optd['mrbump_scripts']))
            # Purge unfinished jobs
            for spath in optd['mrbump_scripts']:
                directory, script = os.path.split(spath)
                name, _ = os.path.splitext(script)
                # Hack to delete old job directories
                logfile = os.path.join(directory, name + '.log')
                if os.path.isfile(logfile):
                    os.unlink(logfile)
                jobdir = os.path.join(directory, 'search_' + name + '_mrbump')
                if os.path.isdir(jobdir):
                    shutil.rmtree(jobdir)
        elif 'ensembles' in optd and optd['ensembles'] and len(
                optd['ensembles']):
            # Rerun from ensembles - check for data/ensembles are ok?
            logger.info('Restarting from existing ensembles: {0}'.format(
                optd['ensembles']))
        elif 'models_dir' in optd and optd['models_dir'] and os.path.isdir(
                optd['models_dir']):
            logger.info('Restarting from existing models: {0}'.format(
                optd['models_dir']))
            # Check the models
            allsame = False if optd['homologs'] else True
            if not pdb_edit.check_pdb_directory(optd['models_dir'],
                                                sequence=None,
                                                single=True,
                                                allsame=allsame):
                msg = "Error importing restart models: {0}".format(
                    optd['models_dir'])
                exit_util.exit_error(msg)
            optd['make_ensembles'] = True
        elif optd['frags_3mers'] and optd['frags_9mers']:
            logger.info('Restarting from existing fragments: {0}, {1}'.format(
                optd['frags_3mers'], optd['frags_9mers']))
            optd['make_models'] = True

    return optd
Esempio n. 21
0
 def _exit(msg, wdir):
     exit_util.exit_error(msg)
Esempio n. 22
0
    def molecular_replacement(self, optd):

        if not optd['mrbump_scripts']:
            # MRBUMP analysis of the ensembles
            logger.info('----- Running MRBUMP on ensembles--------\n\n')
            if len(optd['ensembles']) < 1:
                msg = "ERROR! Cannot run MRBUMP as there are no ensembles!"
                exit_util.exit_error(msg)

            if optd['mrbump_dir'] is None:
                bump_dir = os.path.join(optd['work_dir'], 'MRBUMP')
                optd['mrbump_dir'] = bump_dir
            else:
                bump_dir = optd['mrbump_dir']
            if not os.path.exists(bump_dir):
                os.mkdir(bump_dir)

            optd['mrbump_results'] = []
            logger.info("Running MRBUMP jobs in directory: %s", bump_dir)

            # Set an ensemble-specific phaser_rms if required
            if optd['phaser_rms'] == 'auto':
                ensembler.set_phaser_rms_from_subcluster_score(optd)

            # Sort the ensembles in a favourable way
            logger.info("Sorting ensembles")
            sort_keys = [
                'cluster_num', 'truncation_level',
                'subcluster_radius_threshold', 'side_chain_treatment'
            ]
            ensemble_pdbs_sorted = ensembler.sort_ensembles(
                optd['ensembles'],
                optd['ensembles_data'],
                keys=sort_keys,
                prioritise=True)

            # Create job scripts
            logger.info("Generating MRBUMP runscripts")
            optd['mrbump_scripts'] = mrbump_util.write_mrbump_files(
                ensemble_pdbs_sorted,
                optd,
                job_time=mrbump_util.MRBUMP_RUNTIME,
                ensemble_options=optd['ensemble_options'],
                directory=bump_dir)

        # Create function for monitoring jobs - static function decorator?
        if self.ample_output:

            def monitor():
                r = mrbump_util.ResultsSummary()
                r.extractResults(optd['mrbump_dir'], purge=bool(optd['purge']))
                optd['mrbump_results'] = r.results
                return self.ample_output.display_results(optd)
        else:
            monitor = None

        # Save results here so that we have the list of scripts and mrbump directory set
        ample_util.save_amoptd(optd)

        # Change to mrbump directory before running
        os.chdir(optd['mrbump_dir'])
        ok = workers_util.run_scripts(
            job_scripts=optd['mrbump_scripts'],
            monitor=monitor,
            check_success=mrbump_util.checkSuccess,
            early_terminate=optd['early_terminate'],
            nproc=optd['nproc'],
            job_time=mrbump_util.MRBUMP_RUNTIME,
            job_name='mrbump',
            submit_cluster=optd['submit_cluster'],
            submit_qtype=optd['submit_qtype'],
            submit_queue=optd['submit_queue'],
            submit_pe_lsf=optd['submit_pe_lsf'],
            submit_pe_sge=optd['submit_pe_sge'],
            submit_array=optd['submit_array'],
            submit_max_array=optd['submit_max_array'])

        if not ok:
            msg = "An error code was returned after running MRBUMP on the ensembles!\n" + \
                  "For further information check the logs in directory: {0}".format(optd['mrbump_dir'])
            logger.critical(msg)

        # Collect the MRBUMP results
        results_summary = mrbump_util.ResultsSummary()
        optd['mrbump_results'] = results_summary.extractResults(
            optd['mrbump_dir'], purge=bool(optd['purge']))
        optd['success'] = results_summary.success
        ample_util.save_amoptd(optd)
        summary = mrbump_util.finalSummary(optd)
        logger.info(summary)
Esempio n. 23
0
def processReflectionFile(amoptd):
    """Make sure we have a valid mtz file. If necessary convert a given cif file.
       Set the mtz variable in the given amoptd to the reflection file to use
       Return True if it all worked or raise an exception if it failed
    """

    # We've been given a sf_cif so convert to mtz
    if amoptd['sf_cif']:
        if not os.path.isfile(amoptd['sf_cif']):
            msg = "Cannot find sf_cif file: {0}".format(amoptd['sf_cif'])
            exit_util.exit_error(msg)
        if not os.path.splitext(amoptd['sf_cif'])[1].lower() == ".cif":
            msg = "Cif file extension is not .cif Please rename the file to give it a .cif extension."
            exit_util.exit_error(msg)

        cp = cif_parser.CifParser()
        mtz = cp.sfcif2mtz(amoptd['sf_cif'])
        # See if reflections have been set aside for Rfree or if we need to calculate
        if cp.hasRfree:
            logger.info(
                "sfcif2mtz: no valid RFREE data so removing FREE column added by mtz2cif"
            )
            amoptd['mtz'] = del_column(mtz, 'FREE')
        else:
            amoptd['mtz'] = mtz

    # Now have an mtz so check it's valid
    if not amoptd['mtz'] or not os.path.isfile(amoptd['mtz']):
        logger.critical("Cannot find MTZ file: %s", amoptd['mtz'])
        sys.exit(1)

    # Get column label info
    reflection_file = reflection_file_reader.any_reflection_file(
        file_name=amoptd['mtz'])
    if not reflection_file.file_type() == "ccp4_mtz":
        logger.critical("File is not of type ccp4_mtz: %s", amoptd['mtz'])
        sys.exit(1)

    # Read the file
    content = reflection_file.file_content()

    # Check any user-given flags
    for flag in ['F', 'SIGF', 'FREE']:
        if amoptd[flag] and amoptd[flag] not in content.column_labels():
            logger.critical("Cannot find flag %s label %s in mtz file %s",
                            flag, amoptd[flag], amoptd['mtz'])
            sys.exit(1)

    # If any of the flags aren't given we set defaults based on what's in the file
    if not amoptd['F']:
        if 'F' not in content.column_types():
            logger.critical(
                "Cannot find column type F for flag F in mtz file: %s",
                amoptd['mtz'])
            sys.exit(1)
        amoptd['F'] = content.column_labels()[content.column_types().index(
            'F')]
    if not amoptd['SIGF']:
        l = 'SIG' + amoptd['F']
        if not l in content.column_labels():
            logger.critical(
                "Cannot find column type %s for flag SIGF in mtz file: %s", l,
                amoptd['mtz'])
            sys.exit(1)
        amoptd['SIGF'] = l

    rfree = _get_rfree(content)
    if amoptd['FREE']:
        # Check is valid
        if not rfree or not rfree == amoptd['FREE']:
            logger.critical(
                "Given RFREE label %s is not valid for mtz file: %s",
                amoptd['FREE'], amoptd['mtz'])
            sys.exit(1)
    else:
        # See if we can find a valid label in the file
        if not rfree:
            # Need to generate RFREE
            logger.warning(
                "Cannot find a valid FREE flag - running uniquefy to generate column with RFREE data."
            )
            amoptd['mtz'] = add_rfree(amoptd['mtz'],
                                      directory=amoptd['work_dir'],
                                      overwrite=False)

            # Check file and get new FREE flag
            rfree = get_rfree(amoptd['mtz'])
            if not rfree:
                logger.critical(
                    "Cannot find valid rfree flag in mtz file %s after running uniquiefy",
                    amoptd['mtz'])
                sys.exit(1)
        amoptd['FREE'] = rfree

    # Output information to user and save to amoptd
    logger.info("Using MTZ file: %s", amoptd['mtz'])
    maxr, minr = content.max_min_resolution()
    amoptd['mtz_min_resolution'] = minr
    amoptd['mtz_max_resolution'] = maxr
    msg = "Resolution limits of MTZ file are: {0: > 6.3F} and {1: > 6.3F}".format(
        minr, maxr)
    logger.info(msg)

    return True
Esempio n. 24
0
def process_modelling_options(optd):
    """ Modelling and ensemble options"""
    # Set default name for modelling directory
    optd['models_dir'] = os.path.join(optd['work_dir'], "models")
    # Check if importing ensembles
    if optd['ensembles']:
        # checks are made in ensembles.import_ensembles
        optd['import_ensembles'] = True
        optd['make_frags'] = False
        optd['make_models'] = False
    elif optd['cluster_dir']:
        if not os.path.isdir(optd['cluster_dir']):
            raise RuntimeError(
                "Import cluster cannot find directory: {0}".format(
                    optd['cluster_dir']))
        models = glob.glob(os.path.join(optd['cluster_dir'], "*.pdb"))
        if not models:
            raise RuntimeError(
                "Import cluster cannot find pdbs in directory: {0}".format(
                    optd['cluster_dir']))
        logger.info("Importing pre-clustered models from directory: %s\n",
                    optd['cluster_dir'])
        optd['cluster_method'] = 'import'
        optd['models'] = optd['cluster_dir']
        optd['make_frags'] = False
        optd['make_models'] = False
    elif optd['ideal_helices']:
        optd['make_frags'] = False
        optd['make_models'] = False
    elif optd['homologs']:
        optd['make_frags'] = False
        optd['make_models'] = False
        if not os.path.isfile(str(optd['alignment_file'])):
            # We need to use gesamt or mustang to do the alignment
            if optd['homolog_aligner'] == 'gesamt':
                if not ample_util.is_exe(str(optd['gesamt_exe'])):
                    optd['gesamt_exe'] = os.path.join(
                        os.environ['CCP4'], 'bin',
                        'gesamt' + ample_util.EXE_EXT)
                if not ample_util.is_exe(str(optd['gesamt_exe'])):
                    raise RuntimeError(
                        'Using homologs without an alignment file and cannot find gesamt_exe: {0}'
                        .format(optd['gesamt_exe']))
            elif optd['homolog_aligner'] == 'mustang':
                if not ample_util.is_exe(str(optd['mustang_exe'])):
                    raise RuntimeError(
                        'Using homologs without an alignment file and cannot find mustang_exe: {0}'
                        .format(optd['mustang_exe']))
            else:
                raise RuntimeError('Unknown homolog_aligner: {0}'.format(
                    optd['homolog_aligner']))
        if not os.path.isdir(str(optd['models'])):
            raise RuntimeError(
                "Homologs option requires a directory of pdb models to be supplied\n"
                + "Please supply the models with the -models flag")
        optd['import_models'] = True
    elif optd['models']:
        if not os.path.exists(optd['models']):
            raise RuntimeError("Cannot find -models path: {}".format(
                optd['models']))
        optd['import_models'] = True
        optd['make_frags'] = False
        optd['make_models'] = False
    elif optd['single_model']:
        optd['cluster_method'] = "skip"
        optd['make_frags'] = False
        optd['make_models'] = False
        optd['single_model_mode'] = True
    # Check import flags
    if optd['import_ensembles'] and (optd['import_models']):
        raise RuntimeError("Cannot import both models and ensembles/clusters!")
    # NMR Checks
    if optd['nmr_model_in']:
        logger.info("Using nmr_model_in file: %s", optd['nmr_model_in'])
        if not os.path.isfile(optd['nmr_model_in']):
            msg = "nmr_model_in flag given, but cannot find file: {0}".format(
                optd['nmr_model_in'])
            exit_util.exit_error(msg)
        if optd['nmr_remodel']:
            optd['make_models'] = True
            if optd['nmr_remodel_fasta']:
                if not os.path.isfile(optd['nmr_remodel_fasta']):
                    raise RuntimeError(
                        "Cannot find nmr_remodel_fasta file: {0}".format(
                            optd['nmr_remodel_fasta']))
            else:
                optd['nmr_remodel_fasta'] = optd['fasta']
            msg = "NMR model will be remodelled with ROSETTA using the sequence from: {0}".format(
                optd['nmr_remodel_fasta'])
            logger.info(msg)
            if not (optd['frags_3mers'] and optd['frags_9mers']):
                optd['make_frags'] = True
                msg = "nmr_remodel - will be making our own fragment files"
                logger.info(msg)
            else:
                if not (os.path.isfile(optd['frags_3mers'])
                        and os.path.isfile(optd['frags_9mers'])):
                    raise RuntimeError(
                        "frags_3mers and frag_9mers files given, but cannot locate them:\n{0}\n{1}\n"
                        .format(optd['frags_3mers'], optd['frags_9mers']))
                optd['make_frags'] = False
        else:
            optd['make_frags'] = False
            optd['make_models'] = False
            msg = "Running in NMR truncate only mode"
            logger.info(msg)
    elif optd['make_models']:
        if not os.path.isdir(optd['models_dir']):
            os.mkdir(optd['models_dir'])
        # If the user has given both fragment files we check they are ok and unset make_frags
        if optd['frags_3mers'] and optd['frags_9mers']:
            if not os.path.isfile(optd['frags_3mers']) or not os.path.isfile(
                    optd['frags_9mers']):
                raise RuntimeError(
                    "frags_3mers and frag_9mers files given, but cannot locate them:\n{0}\n{1}\n"
                    .format(optd['frags_3mers'], optd['frags_9mers']))
            optd['make_frags'] = False
        if optd['make_frags'] and (optd['frags_3mers'] or optd['frags_9mers']):
            raise RuntimeError(
                "make_frags set to true, but you have given the path to the frags_3mers or frags_9mers"
            )
        if not optd['make_frags'] and not (optd['frags_3mers']
                                           and optd['frags_9mers']):
            msg = """*** Missing fragment files! ***
    Please supply the paths to the fragment files using the -frags_3mers and -frags_9mers flags.
    These can be generated using the Robetta server: http://robetta.bakerlab.org
    Please see the AMPLE documentation for further information."""
            raise RuntimeError(msg)
    if optd['make_frags']:
        if optd['use_homs']:
            logger.info('Making fragments (including homologues)')
        else:
            logger.info('Making fragments EXCLUDING HOMOLOGUES')
    else:
        logger.info('NOT making Fragments')
    if optd['make_models']:
        logger.info('\nMaking Rosetta Models')
    else:
        logger.info('NOT making Rosetta Models')
Esempio n. 25
0
    def molecular_replacement(self, optd):
        mrbump_util.set_success_criteria(optd)
        if not optd['mrbump_scripts']:
            # MRBUMP analysis of the ensembles
            logger.info('----- Running MRBUMP on ensembles--------\n\n')
            if len(optd['ensembles']) < 1:
                msg = "ERROR! Cannot run MRBUMP as there are no ensembles!"
                exit_util.exit_error(msg)

            if optd['mrbump_dir'] is None:
                bump_dir = os.path.join(optd['work_dir'], 'MRBUMP')
                optd['mrbump_dir'] = bump_dir
            else:
                bump_dir = optd['mrbump_dir']
            if not os.path.exists(bump_dir):
                os.mkdir(bump_dir)

            optd['mrbump_results'] = []
            logger.info("Running MRBUMP jobs in directory: %s", bump_dir)

            # Set an ensemble-specific phaser_rms if required
            if optd['phaser_rms'] == 'auto':
                ensembler.set_phaser_rms_from_subcluster_score(optd)

            # Sort the ensembles in a favourable way
            logger.info("Sorting ensembles")
            sort_keys = ['cluster_num', 'truncation_level', 'subcluster_radius_threshold', 'side_chain_treatment']
            ensemble_pdbs_sorted = ensembler.sort_ensembles(
                optd['ensembles'], optd['ensembles_data'], keys=sort_keys, prioritise=True)

            # Create job scripts
            logger.info("Generating MRBUMP runscripts")
            optd['mrbump_scripts'] = mrbump_util.write_mrbump_files(
                ensemble_pdbs_sorted,
                optd,
                job_time=mrbump_util.MRBUMP_RUNTIME,
                ensemble_options=optd['ensemble_options'],
                directory=bump_dir)

        # Create function for monitoring jobs - static function decorator?
        if self.ample_output:
            def monitor():
                r = mrbump_util.ResultsSummary()
                r.extractResults(optd['mrbump_dir'], purge=bool(optd['purge']))
                optd['mrbump_results'] = r.results
                return self.ample_output.display_results(optd)
        else:
            monitor = None

        # Save results here so that we have the list of scripts and mrbump directory set
        ample_util.save_amoptd(optd)

        # Change to mrbump directory before running
        os.chdir(optd['mrbump_dir'])
        ok = workers_util.run_scripts(
            job_scripts=optd['mrbump_scripts'],
            monitor=monitor,
            check_success=mrbump_util.checkSuccess,
            early_terminate=optd['early_terminate'],
            nproc=optd['nproc'],
            job_time=mrbump_util.MRBUMP_RUNTIME,
            job_name='mrbump',
            submit_cluster=optd['submit_cluster'],
            submit_qtype=optd['submit_qtype'],
            submit_queue=optd['submit_queue'],
            submit_pe_lsf=optd['submit_pe_lsf'],
            submit_pe_sge=optd['submit_pe_sge'],
            submit_array=optd['submit_array'],
            submit_max_array=optd['submit_max_array'])

        if not ok:
            msg = "An error code was returned after running MRBUMP on the ensembles!\n" + \
                  "For further information check the logs in directory: {0}".format(optd['mrbump_dir'])
            logger.critical(msg)

        # Collect the MRBUMP results
        results_summary = mrbump_util.ResultsSummary()
        optd['mrbump_results'] = results_summary.extractResults(optd['mrbump_dir'], purge=bool(optd['purge']))
        optd['success'] = results_summary.success
        ample_util.save_amoptd(optd)
        summary = mrbump_util.finalSummary(optd)
        logger.info(summary)
Esempio n. 26
0
def process_mr_options(optd):
    # Molecular Replacement Options
    if optd['molrep_only']:
        optd['phaser_only'] = False
    if optd['molrep_only']:
        optd['mrbump_programs'] = ['molrep']
    elif optd['phaser_only']:
        optd['mrbump_programs'] = ['phaser']
    else:
        optd['mrbump_programs'] = ['molrep', 'phaser']
    if optd['phaser_rms'] != 'auto':
        try:
            phaser_rms = float(optd['phaser_rms'])
        except ValueError as e:
            msg = "Error converting phaser_rms '{0}' to floating point: {1}".format(optd['phaser_rms'], e)
            exit_util.exit_error(msg)
        else:
            optd['phaser_rms'] = phaser_rms

    # Disable all rebuilding if the resolution is too poor
    if optd['mtz_min_resolution'] >= mrbump_util.REBUILD_MAX_PERMITTED_RESOLUTION:
        logger.warn("!!! Disabling all rebuilding as maximum resolution of %f is too poor!!!".format(optd['mtz_min_resolution']))
        optd['use_shelxe'] = False
        optd['shelxe_rebuild'] = False
        optd['shelxe_rebuild_arpwarp'] = False
        optd['shelxe_rebuild_buccaneer'] = False
        optd['refine_rebuild_arpwarp'] = False
        optd['refine_rebuild_buccaneer'] = False
        
    # We use shelxe by default so if we can't find it we just warn and set use_shelxe to False
    if optd['use_shelxe']:
        if optd['mtz_min_resolution'] > mrbump_util.SHELXE_MAX_PERMITTED_RESOLUTION:
            logger.warn("Disabling use of SHELXE as min resolution of %f is < accepted limit of %f",
                        optd['mtz_min_resolution'],
                        mrbump_util.SHELXE_MAX_PERMITTED_RESOLUTION)
            optd['use_shelxe'] = False
            optd['shelxe_rebuild'] = False
    if optd['use_shelxe']:
        if not optd['shelxe_exe']:
            optd['shelxe_exe'] = os.path.join(os.environ['CCP4'], 'bin', 'shelxe' + ample_util.EXE_EXT)
        try:
            optd['shelxe_exe'] = ample_util.find_exe(optd['shelxe_exe'])
        except ample_util.FileNotFoundError:
            msg = """*** Cannot find shelxe executable in PATH - turning off use of SHELXE. ***
    SHELXE is recommended for the best chance of success. We recommend you install shelxe from:
    http://shelx.uni-ac.gwdg.de/SHELX/
    and install it in your PATH so that AMPLE can use it.
    """
            logger.warn(msg)
            optd['use_shelxe'] = False
    if optd['shelxe_rebuild']:
        optd['shelxe_rebuild_arpwarp'] = True
        optd['shelxe_rebuild_buccaneer'] = True
        
    # If shelxe_rebuild is set we need use_shelxe to be set
    if (optd['shelxe_rebuild'] or optd['shelxe_rebuild_arpwarp']  or optd['shelxe_rebuild_buccaneer']) and not optd['use_shelxe']:
        raise RuntimeError('shelxe_rebuild is set but use_shelxe is False. Please make sure you have shelxe installed.')

    if optd['refine_rebuild_arpwarp'] or optd['shelxe_rebuild_arpwarp']:
        auto_tracing_sh = None
        if 'warpbin' in os.environ:
            _path = os.path.join(os.environ['warpbin'], "auto_tracing.sh")
            if os.path.isfile(_path):
                auto_tracing_sh = _path
        if auto_tracing_sh:
            logger.info('Using arpwarp script: %s', auto_tracing_sh)
        else:
            logger.warn('Cannot find arpwarp script! Disabling use of arpwarp.')
            optd['refine_rebuild_arpwarp'] = False
            optd['shelxe_rebuild_arpwarp'] = False

    if optd['refine_rebuild_arpwarp'] or optd['shelxe_rebuild_arpwarp']:
        logger.info('Rebuilding in ARP/wARP')
    else:
        logger.info('Not rebuilding in ARP/wARP')

    if optd['refine_rebuild_buccaneer'] or optd['shelxe_rebuild_buccaneer']:
        logger.info('Rebuilding in Bucaneer')
    else:
        logger.info('Not rebuilding in Bucaneer')
Esempio n. 27
0
def process_mr_options(optd):
    # Molecular Replacement Options
    if optd['molrep_only']:
        optd['phaser_only'] = False
    if optd['molrep_only']:
        optd['mrbump_programs'] = ['molrep']
    elif optd['phaser_only']:
        optd['mrbump_programs'] = ['phaser']
    else:
        optd['mrbump_programs'] = ['molrep', 'phaser']
    if optd['phaser_rms'] != 'auto':
        try:
            phaser_rms = float(optd['phaser_rms'])
        except ValueError as e:
            msg = "Error converting phaser_rms '{0}' to floating point: {1}".format(
                optd['phaser_rms'], e)
            exit_util.exit_error(msg)
        else:
            optd['phaser_rms'] = phaser_rms

    # Disable all rebuilding if the resolution is too poor
    if optd['mtz_min_resolution'] >= mrbump_util.REBUILD_MAX_PERMITTED_RESOLUTION:
        logger.warn(
            "!!! Disabling all rebuilding as maximum resolution of %f is too poor!!!"
            .format(optd['mtz_min_resolution']))
        optd['use_shelxe'] = False
        optd['shelxe_rebuild'] = False
        optd['shelxe_rebuild_arpwarp'] = False
        optd['shelxe_rebuild_buccaneer'] = False
        optd['refine_rebuild_arpwarp'] = False
        optd['refine_rebuild_buccaneer'] = False

    if optd['shelxe_max_resolution'] < 0.0:
        if optd['coiled_coil']:
            optd[
                'shelxe_max_resolution'] = mrbump_util.SHELXE_MAX_PERMITTED_RESOLUTION_CC
        else:
            optd[
                'shelxe_max_resolution'] = mrbump_util.SHELXE_MAX_PERMITTED_RESOLUTION

    # We use shelxe by default so if we can't find it we just warn and set use_shelxe to False
    if optd['use_shelxe']:
        if optd['mtz_min_resolution'] > optd['shelxe_max_resolution']:
            logger.warn(
                "Disabling use of SHELXE as min resolution of %f is < accepted limit of %f",
                optd['mtz_min_resolution'],
                optd['shelxe_max_resolution'],
            )
            optd['use_shelxe'] = False
            optd['shelxe_rebuild'] = False
    if optd['use_shelxe']:
        if not optd['shelxe_exe']:
            optd['shelxe_exe'] = os.path.join(os.environ['CCP4'], 'bin',
                                              'shelxe' + ample_util.EXE_EXT)
        try:
            optd['shelxe_exe'] = ample_util.find_exe(optd['shelxe_exe'])
        except ample_util.FileNotFoundError:
            msg = """*** Cannot find shelxe executable in PATH - turning off use of SHELXE. ***
    SHELXE is recommended for the best chance of success. We recommend you install shelxe from:
    http://shelx.uni-ac.gwdg.de/SHELX/
    and install it in your PATH so that AMPLE can use it.
    """
            logger.warn(msg)
            optd['use_shelxe'] = False
    if optd['shelxe_rebuild']:
        optd['shelxe_rebuild_arpwarp'] = True
        optd['shelxe_rebuild_buccaneer'] = True

    # If shelxe_rebuild is set we need use_shelxe to be set
    if (optd['shelxe_rebuild'] or optd['shelxe_rebuild_arpwarp']
            or optd['shelxe_rebuild_buccaneer']) and not optd['use_shelxe']:
        raise RuntimeError(
            'shelxe_rebuild is set but use_shelxe is False. Please make sure you have shelxe installed.'
        )

    if optd['refine_rebuild_arpwarp'] or optd['shelxe_rebuild_arpwarp']:
        auto_tracing_sh = None
        if 'warpbin' in os.environ:
            _path = os.path.join(os.environ['warpbin'], "auto_tracing.sh")
            if os.path.isfile(_path):
                auto_tracing_sh = _path
        if auto_tracing_sh:
            logger.info('Using arpwarp script: %s', auto_tracing_sh)
        else:
            logger.warn(
                'Cannot find arpwarp script! Disabling use of arpwarp.')
            optd['refine_rebuild_arpwarp'] = False
            optd['shelxe_rebuild_arpwarp'] = False

    if optd['refine_rebuild_arpwarp'] or optd['shelxe_rebuild_arpwarp']:
        logger.info('Rebuilding in ARP/wARP')
    else:
        logger.info('Not rebuilding in ARP/wARP')

    if optd['refine_rebuild_buccaneer'] or optd['shelxe_rebuild_buccaneer']:
        logger.info('Rebuilding in Buccaneer')
    else:
        logger.info('Not rebuilding in Buccaneer')
Esempio n. 28
0
def process_modelling_options(optd):
    """ Modelling and ensemble options"""
    # Set default name for modelling directory
    optd['models_dir'] = os.path.join(optd['work_dir'], "models")
    # Check if importing ensembles
    if optd['ensembles']:
        # checks are made in ensembles.import_ensembles
        optd['import_ensembles'] = True
        optd['make_frags'] = False
        optd['make_models'] = False
    elif optd['cluster_dir']:
        if not os.path.isdir(optd['cluster_dir']):
            raise RuntimeError("Import cluster cannot find directory: {0}".format(optd['cluster_dir']))
        models = glob.glob(os.path.join(optd['cluster_dir'], "*.pdb"))
        if not models:
            raise RuntimeError("Import cluster cannot find pdbs in directory: {0}".format(optd['cluster_dir']))
        logger.info("Importing pre-clustered models from directory: %s\n", optd['cluster_dir'])
        optd['cluster_method'] = 'import'
        optd['models'] = optd['cluster_dir']
        optd['make_frags'] = False
        optd['make_models'] = False
    elif optd['ideal_helices']:
        optd['make_frags'] = False
        optd['make_models'] = False
    elif optd['homologs']:
        optd['make_frags'] = False
        optd['make_models'] = False
        if not os.path.isfile(str(optd['alignment_file'])):
            # We need to use gesamt or mustang to do the alignment
            if optd['homolog_aligner'] == 'gesamt':
                if not ample_util.is_exe(str(optd['gesamt_exe'])):
                    optd['gesamt_exe'] = os.path.join(os.environ['CCP4'], 'bin', 'gesamt' + ample_util.EXE_EXT)
                if not ample_util.is_exe(str(optd['gesamt_exe'])):
                    raise RuntimeError('Using homologs without an alignment file and cannot find gesamt_exe: {0}'.format(
                        optd['gesamt_exe']))
            elif optd['homolog_aligner'] == 'mustang':
                if not ample_util.is_exe(str(optd['mustang_exe'])):
                    raise RuntimeError('Using homologs without an alignment file and cannot find mustang_exe: {0}'.format(
                        optd['mustang_exe']))
            else:
                raise RuntimeError('Unknown homolog_aligner: {0}'.format(optd['homolog_aligner']))
        if not os.path.isdir(str(optd['models'])):
            raise RuntimeError("Homologs option requires a directory of pdb models to be supplied\n" + \
                              "Please supply the models with the -models flag")
        optd['import_models'] = True
    elif optd['models']:
        if not os.path.exists(optd['models']):
            raise RuntimeError("Cannot find -models path: {}".format(optd['models']))
        optd['import_models'] = True
        optd['make_frags'] = False
        optd['make_models'] = False
    elif optd['single_model']:
        optd['cluster_method'] = "skip"
        optd['make_frags'] = False
        optd['make_models'] = False
        optd['single_model_mode'] = True
    # Check import flags
    if optd['import_ensembles'] and (optd['import_models']):
        raise RuntimeError("Cannot import both models and ensembles/clusters!")
    # NMR Checks
    if optd['nmr_model_in']:
        logger.info("Using nmr_model_in file: %s", optd['nmr_model_in'])
        if not os.path.isfile(optd['nmr_model_in']):
            msg = "nmr_model_in flag given, but cannot find file: {0}".format(optd['nmr_model_in'])
            exit_util.exit_error(msg)
        if optd['nmr_remodel']:
            optd['make_models'] = True
            if optd['nmr_remodel_fasta']:
                if not os.path.isfile(optd['nmr_remodel_fasta']):
                    raise RuntimeError("Cannot find nmr_remodel_fasta file: {0}".format(optd['nmr_remodel_fasta']))
            else:
                optd['nmr_remodel_fasta'] = optd['fasta']
            msg = "NMR model will be remodelled with ROSETTA using the sequence from: {0}".format(
                optd['nmr_remodel_fasta'])
            logger.info(msg)
            if not (optd['frags_3mers'] and optd['frags_9mers']):
                optd['make_frags'] = True
                msg = "nmr_remodel - will be making our own fragment files"
                logger.info(msg)
            else:
                if not (os.path.isfile(optd['frags_3mers']) and os.path.isfile(optd['frags_9mers'])):
                    raise RuntimeError("frags_3mers and frag_9mers files given, but cannot locate them:\n{0}\n{1}\n".format(
                        optd['frags_3mers'], optd['frags_9mers']))
                optd['make_frags'] = False
        else:
            optd['make_frags'] = False
            optd['make_models'] = False
            msg = "Running in NMR truncate only mode"
            logger.info(msg)
    elif optd['make_models']:
        if not os.path.isdir(optd['models_dir']):
            os.mkdir(optd['models_dir'])
        # If the user has given both fragment files we check they are ok and unset make_frags
        if optd['frags_3mers'] and optd['frags_9mers']:
            if not os.path.isfile(optd['frags_3mers']) or not os.path.isfile(optd['frags_9mers']):
                raise RuntimeError("frags_3mers and frag_9mers files given, but cannot locate them:\n{0}\n{1}\n".format(
                    optd['frags_3mers'], optd['frags_9mers']))
            optd['make_frags'] = False
        if optd['make_frags'] and (optd['frags_3mers'] or optd['frags_9mers']):
            raise RuntimeError("make_frags set to true, but you have given the path to the frags_3mers or frags_9mers")
        if not optd['make_frags'] and not (optd['frags_3mers'] and optd['frags_9mers']):
            msg = """*** Missing fragment files! ***
    Please supply the paths to the fragment files using the -frags_3mers and -frags_9mers flags.
    These can be generated using the Robetta server: http://robetta.bakerlab.org
    Please see the AMPLE documentation for further information."""
            raise RuntimeError(msg)
    if optd['make_frags']:
        if optd['use_homs']:
            logger.info('Making fragments (including homologues)')
        else:
            logger.info('Making fragments EXCLUDING HOMOLOGUES')
    else:
        logger.info('NOT making Fragments')
    if optd['make_models']:
        logger.info('\nMaking Rosetta Models')
    else:
        logger.info('NOT making Rosetta Models')
Esempio n. 29
0
def process_fasta(amoptd, canonicalise=False):
    # Check we can find the input fasta
    if not os.path.exists(str(amoptd['fasta'])):
        msg = 'Cannot find fasta file: {0}'.format(amoptd['fasta'])
        exit_util.exit_error(msg)

    # Reformat to what we need
    logging.debug('Parsing FASTA file')
    try:
        fp = Sequence(fasta=amoptd['fasta'], canonicalise=canonicalise)
    except Exception as e:
        msg = "Error parsing FASTA file: {0}\n\n{1}".format(
            amoptd['fasta'], e.message)
        exit_util.exit_error(msg)
    if fp.numSequences() != 1:
        msg = "ERROR! Fasta file {0} has > 1 sequence in it.".format(
            amoptd['fasta'])
        exit_util.exit_error(msg)

    # Length checks
    amoptd['fasta_length'] = fp.length()
    logging.info("Fasta is {0} amino acids long".format(
        amoptd['fasta_length']))

    # Check we have a decent length
    if amoptd['fasta_length'] < 9:
        msg = "ERROR! Fasta is of length {0}. This is much too short!".format(
            amoptd['fasta_length'])
        exit_util.exit_error(msg)

    # Check we will be able to truncate at this level
    if (float(amoptd['fasta_length']) / 100) * float(amoptd['percent']) < 1:
        msg = "Cannot truncate a fasta sequence of length {0} with {1} percent intervals. Please select a larger interval.".format(
            amoptd['fasta_length'], amoptd['percent'])
        exit_util.exit_error(msg)

    # Check that the sequence doesn't have a his-tag in it
    if not amoptd['allow_his_tag']:
        his_tag = 'HHHHHH'
        i = fp.sequence().find(his_tag)
        l = fp.length()
        if (0 <= i <= 20) or (l - 20 <= i <= l):
            msg = 'The fasta sequence contains a his tag sequence {0} at position {1}. If you wish to use ample with this sequence, please use the \"-allow_his_tag True\" option'.format(
                his_tag, i)
            exit_util.exit_error(msg)

    # Fasta is ok, so write out a canonical fasta in the work directory
    outfasta = os.path.join(amoptd['work_dir'], amoptd['name'] + '_.fasta')
    fp.write_fasta(outfasta)
    amoptd['fasta'] = outfasta
    amoptd['sequence'] = fp.sequence()

    return
Esempio n. 30
0
def find_maxcluster(amoptd):
    """Return path to maxcluster binary.
    If we can't find one in the path, we create a $HOME/.ample
    directory and downlod it to there
    """

    if amoptd['maxcluster_exe'] and ample_util.is_exe(
            amoptd['maxcluster_exe']):
        return amoptd['maxcluster_exe']

    if not amoptd['maxcluster_exe']:
        if sys.platform.startswith("win"):
            amoptd['maxcluster_exe'] = 'maxcluster.exe'
        else:
            amoptd['maxcluster_exe'] = 'maxcluster'

    try:
        maxcluster_exe = ample_util.find_exe(amoptd['maxcluster_exe'],
                                             dirs=[amoptd['rcdir']])
    except ample_util.FileNotFoundError:
        # Cannot find so we need to try and download it
        rcdir = amoptd['rcdir']
        logger.info(
            "Cannot find maxcluster binary in path so attempting to download it directory: {0}"
            .format(rcdir))
        if not os.path.isdir(rcdir):
            logger.info(
                "No ample rcdir found so creating in: {0}".format(rcdir))
            os.mkdir(rcdir)
        url = None
        maxcluster_exe = os.path.join(rcdir, 'maxcluster')
        if sys.platform.startswith("linux"):
            bit = platform.architecture()[0]
            if bit == '64bit':
                url = 'http://www.sbg.bio.ic.ac.uk/~maxcluster/maxcluster64bit'
            elif bit == '32bit':
                url = 'http://www.sbg.bio.ic.ac.uk/~maxcluster/maxcluster'
            else:
                msg = "Unrecognised system type: {0} {1}".format(
                    sys.platform, bit)
                exit_util.exit_error(msg)
        elif sys.platform.startswith("darwin"):
            url = 'http://www.sbg.bio.ic.ac.uk/~maxcluster/maxcluster_i686_32bit.bin'
            #OSX PPC: http://www.sbg.bio.ic.ac.uk/~maxcluster/maxcluster_PPC_32bit.bin
        elif sys.platform.startswith("win"):
            url = 'http://www.sbg.bio.ic.ac.uk/~maxcluster/maxcluster.exe'
            maxcluster_exe = os.path.join(rcdir, 'maxcluster.exe')
        else:
            msg = "Unrecognised system type: {0}".format(sys.platform)
            exit_util.exit_error(msg)
        logger.info(
            "Attempting to download maxcluster binary from: {0}".format(url))
        try:
            urllib.urlretrieve(url, maxcluster_exe)
        except Exception, e:
            msg = "Error downloading maxcluster executable: {0}\n{1}".format(
                url, e)
            exit_util.exit_error(msg)

        # make executable
        os.chmod(maxcluster_exe, 0o777)
Esempio n. 31
0
    def ensembling(self, optd):
        if optd['import_ensembles']:
            ensembler.import_ensembles(optd)
        elif optd['ideal_helices']:
            ample_util.ideal_helices(optd)
            logger.info("*** Using ideal helices to solve structure ***")
        else:
            # Import the models here instead of cluster_util.
            if optd['cluster_method'] is 'import':
                # HACK - this is certainly not how we want to do it. One flag for all (-models) in future
                optd['models'] = optd['cluster_dir']
                optd['models'] = ample_util.extract_and_validate_models(optd)

            # Check we have some models to work with
            if not (optd['single_model_mode'] or optd['models']):
                ample_util.save_amoptd(optd)
                msg = "ERROR! Cannot find any pdb files in: {0}".format(
                    optd['models_dir'])
                exit_util.exit_error(msg)
            optd['ensemble_ok'] = os.path.join(optd['work_dir'], 'ensemble.ok')
            if optd['submit_cluster']:
                # Pickle dictionary so it can be opened by the job to get the parameters
                ample_util.save_amoptd(optd)
                script = ensembler.cluster_script(optd)
                ensembler_timeout = ensembler.get_ensembler_timeout(optd)
                workers_util.run_scripts(
                    job_scripts=[script],
                    monitor=monitor,
                    nproc=optd['nproc'],
                    job_time=ensembler_timeout,
                    job_name='ensemble',
                    submit_cluster=optd['submit_cluster'],
                    submit_qtype=optd['submit_qtype'],
                    submit_queue=optd['submit_queue'],
                    submit_pe_lsf=optd['submit_pe_lsf'],
                    submit_pe_sge=optd['submit_pe_sge'],
                    submit_array=optd['submit_array'],
                    submit_max_array=optd['submit_max_array'])
                # queue finished so unpickle results
                optd.update(ample_util.read_amoptd(optd['results_path']))
            else:
                try:
                    ensembler.create_ensembles(optd)
                except Exception as e:
                    msg = "Error creating ensembles: {0}".format(e)
                    exit_util.exit_error(msg, sys.exc_info()[2])

            # Check we have something to work with
            if not os.path.isfile(
                    optd['ensemble_ok']) or 'ensembles' not in optd.keys(
                    ) or not len(optd['ensembles']):
                msg = "Problem generating ensembles!"
                exit_util.exit_error(msg)

            if not (optd['homologs'] or optd['single_model_mode']):
                ensemble_summary = ensembler.ensemble_summary(
                    optd['ensembles_data'])
                logger.info(ensemble_summary)

        # Save the results
        ample_util.save_amoptd(optd)

        # Bail here if we didn't create anything
        if not len(optd['ensembles']):
            msg = "### AMPLE FAILED TO GENERATE ANY ENSEMBLES! ###\nExiting..."
            exit_util.exit_error(msg)

        # Update results view
        if self.ample_output:
            self.ample_output.display_results(optd)
        return
Esempio n. 32
0
    def ensembling(self, optd):
        if optd['import_ensembles']:
            ensembler.import_ensembles(optd)
        elif optd['ideal_helices']:
            ample_util.ideal_helices(optd)
            logger.info("*** Using ideal helices to solve structure ***")
        else:
            # Check we have some models to work with
            if not (optd['single_model_mode'] or optd['processed_models']):
                ample_util.save_amoptd(optd)
                msg = "ERROR! Cannot find any pdb files in: {0}".format(optd['models_dir'])
                exit_util.exit_error(msg)
            optd['ensemble_ok'] = os.path.join(optd['work_dir'], 'ensemble.ok')
            if optd['submit_cluster']:
                # Pickle dictionary so it can be opened by the job to get the parameters
                ample_util.save_amoptd(optd)
                script = ensembler.cluster_script(optd)
                ensembler_timeout = ensembler.get_ensembler_timeout(optd)
                workers_util.run_scripts(
                    job_scripts=[script],
                    monitor=monitor,
                    nproc=optd['nproc'],
                    job_time=ensembler_timeout,
                    job_name='ensemble',
                    submit_cluster=optd['submit_cluster'],
                    submit_qtype=optd['submit_qtype'],
                    submit_queue=optd['submit_queue'],
                    submit_pe_lsf=optd['submit_pe_lsf'],
                    submit_pe_sge=optd['submit_pe_sge'],
                    submit_array=optd['submit_array'],
                    submit_max_array=optd['submit_max_array'])
                # queue finished so unpickle results
                optd.update(ample_util.read_amoptd(optd['results_path']))
            else:
                try:
                    ensembler.create_ensembles(optd)
                except Exception as e:
                    msg = "Error creating ensembles: {0}".format(e)
                    exit_util.exit_error(msg, sys.exc_info()[2])

            # Check we have something to work with
            if not os.path.isfile(optd['ensemble_ok']) or 'ensembles' not in optd.keys() or not len(optd['ensembles']):
                msg = "Problem generating ensembles!"
                exit_util.exit_error(msg)

            if not (optd['homologs'] or optd['single_model_mode']):
                ensemble_summary = ensembler.ensemble_summary(optd['ensembles_data'])
                logger.info(ensemble_summary)

        # Save the results
        ample_util.save_amoptd(optd)

        # Bail here if we didn't create anything
        if not len(optd['ensembles']):
            msg = "### AMPLE FAILED TO GENERATE ANY ENSEMBLES! ###\nExiting..."
            exit_util.exit_error(msg)

        # Update results view
        if self.ample_output:
            self.ample_output.display_results(optd)
        return
Esempio n. 33
0
    def modelling(self, optd, rosetta_modeller=None):
        if not (optd['import_models'] or optd['make_frags']
                or optd['make_models'] or optd['nmr_remodel']):
            return
        # Set the direcotry where the final models will end up
        optd['models_dir'] = os.path.join(optd['work_dir'], 'models')
        if not os.path.isdir(optd['models_dir']):
            os.mkdir(optd['models_dir'])
        if not rosetta_modeller:
            rosetta_modeller = options_processor.process_rosetta_options(optd)
        # Make Rosetta fragments
        if optd['make_frags']:
            rosetta_modeller.generate_fragments(optd)
            optd['frags_3mers'] = rosetta_modeller.frags_3mers
            optd['frags_9mers'] = rosetta_modeller.frags_9mers
            optd['psipred_ss2'] = rosetta_modeller.psipred_ss2

        if optd["use_contacts"] and not optd['restraints_file']:
            con_util = contact_util.ContactUtil(
                optd['fasta'],
                'fasta',
                contact_file=optd['contact_file'],
                contact_format=optd['contact_format'],
                bbcontacts_file=optd['bbcontacts_file'],
                bbcontacts_format=optd["bbcontacts_format"],
                cutoff_factor=optd['restraints_factor'],
                distance_to_neighbor=optd['distance_to_neighbour'])

            optd["contacts_dir"] = os.path.join(optd["work_dir"], "contacts")
            if not os.path.isdir(optd["contacts_dir"]):
                os.mkdir(optd["contacts_dir"])
            if con_util.require_contact_prediction:
                if con_util.found_ccmpred_contact_prediction_deps:
                    con_util.predict_contacts_from_sequence(
                        wdir=optd["contacts_dir"])
                    optd["contact_file"] = con_util.contact_file
                    optd["contact_format"] = con_util.contact_format

            if con_util.do_contact_analysis:
                plot_file = os.path.join(optd['contacts_dir'],
                                         optd['name'] + ".cm.png")
                if optd['native_pdb'] and optd['native_pdb_std']:
                    structure_file = optd['native_pdb_std']
                elif optd["native_pdb"]:
                    structure_file = optd['native_std']
                else:
                    structure_file = None
                optd['contact_map'], optd['contact_ppv'] = con_util.summarize(
                    plot_file, structure_file, 'pdb', optd['native_cutoff'])

                restraints_file = os.path.join(optd['contacts_dir'],
                                               optd['name'] + ".cst")
                optd['restraints_file'] = con_util.write_restraints(
                    restraints_file, optd['restraints_format'],
                    optd['energy_function'])
            else:
                con_util = None
        else:
            con_util = None

        if optd['make_models'] and optd['restraints_file']:
            rosetta_modeller.restraints_file = optd['restraints_file']

        if optd['make_models']:
            logger.info('----- making Rosetta models--------')
            if optd['nmr_remodel']:
                try:
                    optd['models'] = rosetta_modeller.nmr_remodel(
                        models=optd['models'],
                        ntimes=optd['nmr_process'],
                        alignment_file=optd['alignment_file'],
                        remodel_fasta=optd['nmr_remodel_fasta'],
                        monitor=monitor)
                except Exception as e:
                    msg = "Error remodelling NMR ensemble: {0}".format(e)
                    exit_util.exit_error(msg, sys.exc_info()[2])
            else:
                logger.info('making %s models...', optd['nmodels'])
                try:
                    optd['models'] = rosetta_modeller.ab_initio_model(
                        monitor=monitor)
                except Exception as e:
                    msg = "Error running ROSETTA to create models: {0}".format(
                        e)
                    exit_util.exit_error(msg, sys.exc_info()[2])
                if not pdb_edit.check_pdb_directory(optd['models_dir'],
                                                    sequence=optd['sequence']):
                    msg = "Problem with rosetta pdb files - please check the log for more information"
                    exit_util.exit_error(msg)
                logger.info('Modelling complete - models stored in: %s\n',
                            optd['models_dir'])

        elif optd['import_models']:
            logger.info('Importing models from directory: %s\n',
                        optd['models_dir'])
            if optd['homologs']:
                optd['models'] = ample_util.extract_and_validate_models(
                    optd, sequence=None, single=True, allsame=False)
            else:
                optd['models'] = ample_util.extract_and_validate_models(optd)
                # Need to check if Quark and handle things accordingly
                if optd['quark_models']:
                    # We always add sidechains to QUARK models if SCWRL is installed
                    if ample_util.is_exe(optd['scwrl_exe']):
                        optd['use_scwrl'] = True
                    else:
                        # No SCWRL so don't do owt with the side chains
                        logger.info(
                            'Using QUARK models but SCWRL is not installed '
                            'so only using %s sidechains', UNMODIFIED)
                        optd['side_chain_treatments'] = [UNMODIFIED]

        # Sub-select the decoys using contact information
        if con_util and optd['subselect_mode'] and not (optd['nmr_model_in'] or
                                                        optd['nmr_remodel']):
            logger.info('Subselecting models from directory using '
                        'provided contact information')
            subselect_data = con_util.subselect_decoys(
                optd['models'], 'pdb', mode=optd['subselect_mode'], **optd)
            optd['models'] = zip(*subselect_data)[0]
            optd['subselect_data'] = dict(subselect_data)

        ample_util.save_amoptd(optd)
Esempio n. 34
0
    def ensembling(self, optd):
        if optd['import_ensembles']:
            ensembler.import_ensembles(optd)
        elif optd['ideal_helices']:
            ample_util.ideal_helices(optd)
            logger.info(
                "*** Attempting to solve the structure using ideal helices ***"
            )
            logger.warning(
                'If ideal helices do not solve the structure, you may want to use -helical_ensembles in '
                'place of -ideal_helices. AMPLE will then use a new set of helical ensembles which has been '
                'very successful on solving challenging cases!')
        elif optd['helical_ensembles']:
            ample_util.ideal_helices(optd)
            logger.info(
                "*** Attempting to solve the structure using %s set of helical ensembles ***"
                % optd['helical_ensembles_set'])
        else:
            # Check we have some models to work with
            if not (optd['single_model_mode'] or optd['processed_models']):
                ample_util.save_amoptd(optd)
                msg = "ERROR! Cannot find any pdb files in: {0}".format(
                    optd['models_dir'])
                exit_util.exit_error(msg)
            optd['ensemble_ok'] = os.path.join(optd['work_dir'], 'ensemble.ok')
            if optd['submit_qtype'] != 'local':
                # Pickle dictionary so it can be opened by the job to get the parameters
                ample_util.save_amoptd(optd)
                script = ensembler.cluster_script(optd)
                ensembler_timeout = ensembler.get_ensembler_timeout(optd)
                with TaskFactory(
                        optd['submit_qtype'],
                        script,
                        cwd=optd['work_dir'],
                        environment=optd['submit_pe'],
                        run_time=ensembler_timeout,
                        name='benchmark',
                        nprocesses=optd['nproc'],
                        max_array_size=optd['submit_max_array'],
                        queue=optd['submit_queue'],
                        shell="/bin/bash",
                ) as task:
                    task.run()
                    task.wait(interval=5, monitor_f=monitor)
                # queue finished so unpickle results
                optd.update(ample_util.read_amoptd(optd['results_path']))
            else:
                try:
                    ensembler.create_ensembles(optd)
                except Exception as e:
                    msg = "Error creating ensembles: {0}".format(e)
                    exit_util.exit_error(msg, sys.exc_info()[2])

            # Check we have something to work with
            if not os.path.isfile(
                    optd['ensemble_ok']) or 'ensembles' not in optd.keys(
                    ) or not len(optd['ensembles']):
                msg = "Problem generating ensembles!"
                exit_util.exit_error(msg)

            if not (optd['homologs'] or optd['single_model_mode']):
                ensemble_summary = ensembler.ensemble_summary(
                    optd['ensembles_data'])
                logger.info(ensemble_summary)

        # Save the results
        ample_util.save_amoptd(optd)

        # Bail here if we didn't create anything
        if not len(optd['ensembles']):
            msg = "### AMPLE FAILED TO GENERATE ANY ENSEMBLES! ###\nExiting..."
            exit_util.exit_error(msg)

        # Update results view
        if self.ample_output:
            self.ample_output.display_results(optd)
        return
Esempio n. 35
0
    def setup(self, optd):
        """We take and return an ample dictionary as an argument.

        This is required because options_processor.process_restart_options Changes what
        optd points at, and this means that if we just use the reference, we end up
        pointing at the old, obsolete dictionary

        """
        optd = options_processor.restart_amoptd(optd)

        # Make a work directory - this way all output goes into this directory
        if optd['work_dir'] and not optd['restart_pkl']:
            logger.info('Making a named work directory: %s', optd['work_dir'])
            try:
                os.mkdir(optd['work_dir'])
            except Exception as e:
                msg = "Cannot create work_dir {0}: {1}".format(
                    optd['work_dir'], e)
                exit_util.exit_error(msg, sys.exc_info()[2])

        if not optd['work_dir']:
            if not os.path.exists(optd['run_dir']):
                msg = 'Cannot find run directory: {0}'.format(optd['run_dir'])
                exit_util.exit_error(msg, sys.exc_info()[2])

            if bool(optd['rvapi_document']):
                # With JSCOFE we run in the run directory
                optd['work_dir'] = optd['run_dir']
            else:
                logger.info('Making a run directory: '
                            'checking for previous runs...')
                optd['work_dir'] = ample_util.make_workdir(
                    optd['run_dir'], ccp4i2=bool(optd['ccp4i2_xml']))

        os.chdir(optd['work_dir'])

        ample_log = os.path.join(optd['work_dir'], 'AMPLE.log')
        debug_log = os.path.join(optd['work_dir'], 'debug.log')
        optd['ample_log'] = ample_log

        logging_util.setup_file_logging(ample_log, level=logging.INFO)
        logging_util.setup_file_logging(debug_log, level=logging.DEBUG)

        optd['ccp4_version'] = ample_util.CCP4.version.version

        logger.info(reference_manager.header)
        logger.info("AMPLE version: %s", str(version.__version__))
        logger.info("Running with CCP4 version: %s from directory: %s",
                    ample_util.CCP4.version, ample_util.CCP4.root)
        logger.info("Running on host: %s", platform.node())
        logger.info("Running on platform: %s", platform.platform())
        logger.info("Job started at: %s",
                    time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
        logger.info("Invoked with command-line:\n%s\n", " ".join(sys.argv))
        logger.info("Running in directory: %s\n", optd['work_dir'])

        if pyrvapi_results.pyrvapi:
            self.ample_output = pyrvapi_results.AmpleOutput(optd)
            self.ample_output.display_results(optd)

        options_processor.check_mandatory_options(optd)

        optd = options_processor.process_restart_options(optd)
        if not optd['restart_pkl']:
            options_processor.process_options(optd)

        if optd['dry_run']:
            logger.info('Dry run finished checking options - cleaning up...')
            os.chdir(optd['run_dir'])
            shutil.rmtree(optd['work_dir'])
            sys.exit(0)

        logger.info('All needed programs are found, continuing...')
        return optd
Esempio n. 36
0
    def setup(self, optd):
        """We take and return an ample dictionary as an argument. This is required because options_processor.process_restart_options
        Changes what optd points at, and this means that if we just use the reference, we end up pointing at the old, obselete dictionary"""

        # Update the ample dictionary in case we are restarting
        optd = options_processor.restart_amoptd(optd)

        # Make a work directory - this way all output goes into this directory
        if optd['work_dir'] and not optd['restart_pkl']:
            logger.info('Making a named work directory: {0}'.format(
                optd['work_dir']))
            try:
                os.mkdir(optd['work_dir'])
            except Exception as e:
                msg = "Cannot create work_dir {0}: {1}".format(
                    optd['work_dir'], e)
                exit_util.exit_error(msg, sys.exc_info()[2])

        if not optd['work_dir']:
            if not os.path.exists(optd['run_dir']):
                msg = 'Cannot find run directory: {0}'.format(optd['run_dir'])
                exit_util.exit_error(msg, sys.exc_info()[2])

            if bool(optd['rvapi_document']):
                # With JSCOFE we run in the run directory
                optd['work_dir'] = optd['run_dir']
            else:
                logger.info(
                    'Making a run directory: checking for previous runs...')
                optd['work_dir'] = ample_util.make_workdir(
                    optd['run_dir'], ccp4i2=bool(optd['ccp4i2_xml']))
        # Go to the work directory
        os.chdir(optd['work_dir'])

        # Set up logging
        ample_log = os.path.join(optd['work_dir'], 'AMPLE.log')
        debug_log = os.path.join(optd['work_dir'], 'debug.log')
        optd['ample_log'] = ample_log

        # Set up ample output file and debug log file.
        logging_util.setup_file_logging(ample_log, level=logging.INFO)
        logging_util.setup_file_logging(debug_log, level=logging.DEBUG)

        # Make sure the CCP4 environment is set up properly
        ccp4_home = self.setup_ccp4(optd)
        ccp4_version = ".".join([str(x) for x in optd['ccp4_version']])

        # Print out Version and invocation
        logger.info(ample_util.header)
        logger.info("AMPLE version: {0}".format(version.__version__))
        logger.info(
            "Running with CCP4 version: {0} from directory: {1}".format(
                ccp4_version, ccp4_home))
        logger.info("Running on host: {0}".format(platform.node()))
        logger.info("Running on platform: {0}".format(platform.platform()))
        logger.info("Job started at: {0}".format(
            time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime())))
        logger.info("Invoked with command-line:\n{0}\n".format(" ".join(
            sys.argv)))
        logger.info("Running in directory: {0}\n".format(optd['work_dir']))

        # Display pyrvapi results
        if pyrvapi_results.pyrvapi:
            self.ample_output = pyrvapi_results.AmpleOutput(optd)
            self.ample_output.display_results(optd)

        # Check mandatory/exclusive options
        options_processor.check_mandatory_options(optd)

        # Check if we are restarting from an existing pkl file - we don't process the options from this
        # run if so
        optd = options_processor.process_restart_options(optd)
        if not optd['restart_pkl']:
            # Only process the remaining options if we aren't in restart mode
            options_processor.process_options(optd)

        # Bail and clean up if we were only checking the options
        if optd['dry_run']:
            logger.info('Dry run finished checking options - cleaning up...')
            os.chdir(optd['run_dir'])
            shutil.rmtree(optd['work_dir'])
            sys.exit(0)

        logger.info('All needed programs are found, continuing...')

        return optd