Esempio n. 1
0
    def test_run_fails_cause_glide_fails(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            params.glide = 'false'
            proteinligprep = ProteinLigPrepTask(temp_dir, params)
            proteinligprep.create_dir()
            open(os.path.join(proteinligprep.get_dir(), D3RTask.COMPLETE_FILE),
                 'a').close()
            glide = GlideTask(temp_dir, params)

            glide.run()
            self.assertEqual(glide.get_error(),
                             'Non zero exit code: 1 received. Standard out: ' +
                             ' Standard error: ')
            # test file gets created
            errfile = os.path.join(glide.get_dir(),
                                   D3RTask.ERROR_FILE)
            self.assertEqual(os.path.isfile(errfile), True)

            stderr = os.path.join(glide.get_dir(),
                                  'false.stderr')
            self.assertEqual(os.path.isfile(stderr), True)
            stdout = os.path.join(glide.get_dir(),
                                  'false.stdout')
            self.assertEqual(os.path.isfile(stdout), True)
        finally:
            shutil.rmtree(temp_dir)
Esempio n. 2
0
    def test_run_succeeds(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            params.glide = 'true'
            proteinligprep = ProteinLigPrepTask(temp_dir, params)
            proteinligprep.create_dir()
            open(os.path.join(proteinligprep.get_dir(),
                              D3RTask.COMPLETE_FILE),
                 'a').close()
            glide = GlideTask(temp_dir, params)

            glide.run()
            self.assertEqual(glide.get_error(), None)
            # test files get created
            errfile = os.path.join(glide.get_dir(),
                                   D3RTask.ERROR_FILE)
            self.assertEqual(os.path.isfile(errfile), False)

            compfile = os.path.join(glide.get_dir(),
                                    D3RTask.COMPLETE_FILE)
            self.assertEqual(os.path.isfile(compfile), True)
            stderr = os.path.join(glide.get_dir(),
                                  'true.stderr')
            self.assertEqual(os.path.isfile(stderr), True)
            stdout = os.path.join(glide.get_dir(),
                                  'true.stdout')
            self.assertEqual(os.path.isfile(stdout), True)
        finally:
            shutil.rmtree(temp_dir)
Esempio n. 3
0
    def __init__(self, path, args):
        super(GlideTask, self).__init__(path, args)
        self.set_name('glide')

        prep = ProteinLigPrepTask(path, args)
        self.set_stage(prep.get_stage() + 1)
        self.set_status(D3RTask.UNKNOWN_STATUS)
Esempio n. 4
0
 def test_run_fails_cause_can_run_is_false(self):
     temp_dir = tempfile.mkdtemp()
     try:
         params = D3RParameters()
         # return immediately cause can_run is false
         proteinligprep = ProteinLigPrepTask(temp_dir, params)
         proteinligprep.run()
         self.assertEqual(proteinligprep.get_error(),
                          'challengedata task has notfound status')
     finally:
         shutil.rmtree(temp_dir)
Esempio n. 5
0
    def test_run_fails_cause_proteinligprep_is_not_found(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            params.proteinligprep = '/bin/doesnotexist'
            params.pdbdb = '/foo'
            chall = ChallengeDataTask(temp_dir, params)
            chall.create_dir()

            challdir = os.path.join(chall.get_dir(),
                                    chall.get_celpp_challenge_data_dir_name())

            open(os.path.join(chall.get_dir(), D3RTask.COMPLETE_FILE),
                 'a').close()
            proteinligprep = ProteinLigPrepTask(temp_dir, params)

            proteinligprep.run()
            self.assertEqual(
                proteinligprep.get_error(), 'Caught Exception trying to run ' +
                '/bin/doesnotexist --candidatedir ' + challdir + ' --pdbdb ' +
                '/foo --outdir ' + proteinligprep.get_dir() +
                ' : [Errno 2] No such file or directory')

            # test files get created
            errfile = os.path.join(proteinligprep.get_dir(),
                                   D3RTask.ERROR_FILE)
            self.assertEqual(os.path.isfile(errfile), True)
        finally:
            shutil.rmtree(temp_dir)
Esempio n. 6
0
    def run(self):
        """Runs ProteinLigPrepTask after verifying proteinligprep was good

           Method requires can_run() to be called before hand with
           successful outcome
           Otherwise method invokes D3RTask.start then this method
           creates a directory and invokes blastnfilter script.  Upon
           completion results are analyzed and success or error status
           is set appropriately and D3RTask.end is invoked
           """
        super(GlideTask, self).run()

        if self._can_run is False:
            logger.debug(
                self.get_dir_name() + ' cannot run cause _can_run flag '
                                      'is False')
            return

        try:
            logger.debug('glide set to ' +
                         self.get_args().glide)
        except AttributeError:
            self.set_error('glide not set')
            self.end()
            return

        proteinligprep = ProteinLigPrepTask(self._path, self._args)

        #
        # glide.py --structuredir <path to stage.3.proteinligprep> \
        # --outdir <path to stage.4.glide>
        #
        cmd_to_run = (self.get_args().glide + ' --structuredir ' +
                      proteinligprep.get_dir() +
                      ' --outdir ' + self.get_dir())

        glide_name = os.path.basename(self.get_args().glide)

        self.run_external_command(glide_name, cmd_to_run,
                                  True)
        # assess the result
        self.end()
Esempio n. 7
0
 def test_run_fails_cause_glide_not_set(self):
     temp_dir = tempfile.mkdtemp()
     try:
         params = D3RParameters()
         proteinligprep = ProteinLigPrepTask(temp_dir, params)
         proteinligprep.create_dir()
         open(os.path.join(proteinligprep.get_dir(), D3RTask.COMPLETE_FILE),
              'a').close()
         glide = GlideTask(temp_dir, params)
         glide.run()
         self.assertEqual(glide.get_error(),
                          'glide not set')
         # test files get created
         self.assertEqual(os.path.isdir(glide.get_dir()),
                          True)
         errfile = os.path.join(glide.get_dir(),
                                D3RTask.ERROR_FILE)
         self.assertEqual(os.path.isfile(errfile), True)
     finally:
         shutil.rmtree(temp_dir)
Esempio n. 8
0
    def test_run_fails_cause_glide_is_not_found(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            params.glide = '/bin/doesnotexist'
            proteinligprep = ProteinLigPrepTask(temp_dir, params)
            proteinligprep.create_dir()
            open(os.path.join(proteinligprep.get_dir(), D3RTask.COMPLETE_FILE),
                 'a').close()
            glide = GlideTask(temp_dir, params)

            glide.run()
            self.assertEqual(glide.get_error(),
                             'Caught Exception trying to run ' +
                             '/bin/doesnotexist --structuredir ' +
                             proteinligprep.get_dir() + ' --outdir ' +
                             glide.get_dir() +
                             ' : [Errno 2] No such file or directory')

            # test files get created
            errfile = os.path.join(glide.get_dir(),
                                   D3RTask.ERROR_FILE)
            self.assertEqual(os.path.isfile(errfile), True)
        finally:
            shutil.rmtree(temp_dir)
Esempio n. 9
0
    def test_can_run(self):
        temp_dir = tempfile.mkdtemp()
        try:
            # no proteinligprep task found so it cannot run
            params = D3RParameters()
            glide = GlideTask(temp_dir, params)
            self.assertEqual(glide.can_run(), False)
            self.assertEqual(glide.get_error(),
                             'proteinligprep task has notfound status')

            # proteinligprep filter running
            proteinligprep = ProteinLigPrepTask(temp_dir, params)
            proteinligprep.create_dir()
            open(os.path.join(proteinligprep.get_dir(), D3RTask.START_FILE),
                 'a').close()
            glide = GlideTask(temp_dir, params)
            self.assertEqual(glide.can_run(), False)
            self.assertEqual(glide.get_error(),
                             'proteinligprep task has start status')

            # proteinligprep failed
            error_file = os.path.join(proteinligprep.get_dir(),
                                      D3RTask.ERROR_FILE)
            open(error_file, 'a').close()
            glide = GlideTask(temp_dir, params)
            self.assertEqual(glide.can_run(), False)
            self.assertEqual(glide.get_error(),
                             'proteinligprep task has error status')

            # proteinligprep success
            os.remove(error_file)
            open(os.path.join(proteinligprep.get_dir(),
                              D3RTask.COMPLETE_FILE), 'a').close()
            glide = GlideTask(temp_dir, params)
            self.assertEqual(glide.can_run(), True)
            self.assertEqual(glide.get_error(), None)

            # glide task exists already
            glide = GlideTask(temp_dir, params)
            glide.create_dir()
            self.assertEqual(glide.can_run(), False)
            self.assertEqual(glide.get_error(),
                             glide.get_dir_name() +
                             ' already exists and status is unknown')

            # glide already complete
            glide = GlideTask(temp_dir, params)
            open(os.path.join(glide.get_dir(),
                              D3RTask.COMPLETE_FILE), 'a').close()
            self.assertEqual(glide.can_run(), False)
            self.assertEqual(glide.get_error(), None)

        finally:
            shutil.rmtree(temp_dir)
Esempio n. 10
0
    def can_run(self):
        """Determines if task can actually run

           This method first verifies the `ProteinLigPrep` task
           has `D3RTask.COMPLETE_STATUS` for
           status.  The method then verifies a `GlideTask` does
           not already exist.  If above is not true then self.set_error()
           is set with information about the issue
           :return: True if can run otherwise False
        """
        self._can_run = False
        self._error = None
        # check blast
        blastnfilter = ProteinLigPrepTask(self._path, self._args)
        blastnfilter.update_status_from_filesystem()
        if blastnfilter.get_status() != D3RTask.COMPLETE_STATUS:
            logger.info('Cannot run ' + self.get_name() + 'task ' +
                        'because ' + blastnfilter.get_name() + 'task' +
                        'has a status of ' + blastnfilter.get_status())
            self.set_error(blastnfilter.get_name() + ' task has ' +
                           blastnfilter.get_status() + ' status')
            return False

        # check this task is not complete and does not exist

        self.update_status_from_filesystem()
        if self.get_status() == D3RTask.COMPLETE_STATUS:
            logger.debug("No work needed for " + self.get_name() +
                         " task")
            return False

        if self.get_status() != D3RTask.NOTFOUND_STATUS:
            logger.warning(self.get_name() + " task was already " +
                           "attempted, but there was a problem")
            self.set_error(self.get_dir_name() + ' already exists and ' +
                           'status is ' + self.get_status())
            return False
        self._can_run = True
        return True
Esempio n. 11
0
 def test_run_fails_cause_pdbdb_not_set(self):
     temp_dir = tempfile.mkdtemp()
     try:
         params = D3RParameters()
         params.proteinligprep = 'false'
         chall = ChallengeDataTask(temp_dir, params)
         chall.create_dir()
         open(os.path.join(chall.get_dir(), D3RTask.COMPLETE_FILE),
              'a').close()
         proteinligprep = ProteinLigPrepTask(temp_dir, params)
         proteinligprep.run()
         self.assertEqual(proteinligprep.get_error(), 'pdbdb not set')
         # test files get created
         self.assertEqual(os.path.isdir(proteinligprep.get_dir()), True)
         errfile = os.path.join(proteinligprep.get_dir(),
                                D3RTask.ERROR_FILE)
         self.assertEqual(os.path.isfile(errfile), True)
     finally:
         shutil.rmtree(temp_dir)
Esempio n. 12
0
class EvaluationTaskFactory(object):
    """Factory class to generate ScoringTask objects

       This factory examines a celpp week directory for
       all docking tasks.  The code then generates
       ScoringTask objects for all eligible docking tasks
    """
    prep = ProteinLigPrepTask('/foo', D3RParameters())
    DOCKSTAGE = prep.get_stage() + 1
    DOCKSTAGE_PREFIX = (D3RTask.STAGE_DIRNAME_PREFIX + '.' + str(DOCKSTAGE) +
                        '.')
    SCORING_SUFFIX = 'evaluation'
    WEB_DATA_SUFFIX = 'webdata'

    def __init__(self, path, theargs):
        """Constructor
        """
        self.set_path(path)
        self.set_args(theargs)

    def set_args(self, theargs):
        """ Sets args
        :param theargs: arguments to set
        """
        self._args = theargs

    def get_args(self):
        """Gets args passed into constructor or via set_args()
        """
        return self._args

    def set_path(self, path):
        """Sets path used to look for docking tasks
        """
        self._path = path

    def get_path(self):
        """Gets path used to look for docking tasks
        """
        return self._path

    def _get_participant_database(self):
        """Creates `ParticipantDatabase`
        :returns: ParticipantDatabase
        """
        dimport = DataImportTask(self.get_path(), self.get_args())
        csvfile = dimport.get_participant_list_csv()
        pfac = ParticipantDatabaseFromCSVFactory(csvfile)
        return pfac.get_participant_database()

    def _update_priorities_of_tasks(self, etasks, participant_db):
        """Updates priorities for EvaluationTask objects in
           list of etask objects from values obtained in
           participant_db ParticipantDatabase
           :param etasks: list of EvaluationTask objects, it is assumed to
                          never be None
           :param participant_db: ParticipantDatabase object, assumed never
                                  to be none
           :returns: same list of EvaluationTask objects with priority set
        """
        for task in etasks:
            guid = task.get_guid_for_task()
            if guid is not None:
                logger.debug('Looking for participant with guid: ' + guid)
                p = participant_db.get_participant_by_guid(guid)
                if p is not None:
                    task.set_priority(p.get_priority())
                    logger.debug('Setting priority for ' + guid + ' to ' +
                                 str(task.get_priority()))
                else:
                    logger.debug('No participant found to match guid: ' + guid)
        return etasks

    def _sort_tasks_by_participant_priority(self, etasks, participant_db):
        """Sorts `EvaluationTask` objects in `etasks` by priority
           set for participants in `participant_db` The sorting
           goes as follows. Participants with highest get_priority()
           value go first, identical priority ordering is arbitrary
           and any EvaluationTasks without priority are put at end
           of list in arbitrary order
        :returns: list of sorted EvaluationTask objects
        """
        if etasks is None:
            logger.debug('No EvaluationTasks to sort')
            return etasks

        if participant_db is None:
            logger.warning('Participant Database is None, cannot sort')
            return etasks

        # update priorities of etasks
        updatedtasks = self._update_priorities_of_tasks(etasks, participant_db)
        updatedtasks.sort(reverse=True, key=lambda task: task.get_priority())
        return updatedtasks

    def get_evaluation_tasks(self):
        """Generate EvaluationTasks

           This method examines the path directory
           set via the constructor or set_path() method
           for all stage 4 tasks excluding tasks
           that end with 'webdata'  A EvaluationTask
           object is created for each of these tasks
           and returned in a list.
           :return: list of EvaluationTask objects or empty list if none found
        """
        path = self.get_path()
        logger.debug('Examining ' + path + ' for docking tasks')
        if not os.path.isdir(path):
            raise PathNotDirectoryError(path + ' is not a directory')
        scoring_tasks = []

        path_list = os.listdir(path)

        participant_db = self._get_participant_database()
        efac = SmtpEmailerFactory(self.get_args())
        emailer = EvaluationEmailer(participant_db, efac.get_smtp_emailer())

        for entry in path_list:
            logger.debug('Checking if ' + entry + ' is a docking task')
            full_path = os.path.join(path, entry)
            if os.path.isdir(full_path):
                if entry.startswith(EvaluationTaskFactory.DOCKSTAGE_PREFIX):
                    if entry.endswith(EvaluationTaskFactory.WEB_DATA_SUFFIX):
                        logger.debug('Skipping ' + entry + ' due to suffix')
                        continue

                    # we have a valid docking path
                    docktask = D3RTask(path, self.get_args())
                    docktask.set_stage(EvaluationTaskFactory.DOCKSTAGE)
                    docktask.set_name(
                        entry[len(EvaluationTaskFactory.DOCKSTAGE_PREFIX):])
                    stask = EvaluationTask(
                        path,
                        docktask.get_name() + '.' +
                        EvaluationTaskFactory.SCORING_SUFFIX, docktask,
                        self.get_args())
                    if stask.can_run():
                        logger.debug('Adding task ' + stask.get_name())
                        stask.set_evaluation_emailer(emailer)
                        scoring_tasks.append(stask)
                    else:
                        if stask.get_error() is None:
                            logger.debug(stask.get_name() + ' cannot be' +
                                         ' added, no error though')
                        else:
                            logger.debug(stask.get_name() + ' cannot be' +
                                         ' added : ' + stask.get_error())

        return self._sort_tasks_by_participant_priority(
            scoring_tasks, participant_db)
Esempio n. 13
0
def main():
    blasttask = BlastNFilterTask('', p)
    dataimport = DataImportTask('', p)
    challenge = ChallengeDataTask('', p)
    glide = GlideTask('', p)
    makedb = MakeBlastDBTask('', p)
    prot = ProteinLigPrepTask('', p)
    vina = AutoDockVinaTask('', p)
    chimeraprep = ChimeraProteinLigPrepTask('', p)
    desc = """
              Version {version}

              Runs the 9 stages (makedb, import, blast, challengedata,
              proteinligprep, {chimeraprep}, extsubmission, glide, vina, &
              evaluation) of CELPP processing pipeline
              (http://www.drugdesigndata.org)

              CELPP processing pipeline relies on a set of directories
              with specific structure. The pipeline runs a set of stages
              Each stage has a numerical value and a name. The numerical
              value denotes order and the stage name identifies separate
              tasks to run in the stage.

              The filesystem structure of the stage is:

              stage.<stage number>.<task name>

              The stage(s) run are defined via the required --stage flag.

              To run multiple stages serially just pass a comma delimited
              list to the --stage flag. Example: --stage import,blast

              NOTE:  When running multiple stages serially the program will
                     not run subsequent stages if a task in a stage fails.
                     Also note order matters, ie putting blast,import will
                     cause celpprunner.py to run blast stage first.

              This program drops a pid lockfile
              (celpprunner.<stage>.lockpid) in celppdir to prevent duplicate
              invocation.

              When run, this program will examine the stage and see
              if work can be done.  If stage is complete or previous
              steps have not completed, the program will exit silently.
              If previous steps have failed or current stage already
              exists in an error or uncomplete state then program will
              report the error via email using addresses set in --email
              flag. Errors will also be reported via stderr/stdout.
              The program will also exit with nonzero exit code.

              This program utilizes simple token files to denote stage
              completion.  If within the stage directory there is a:

              '{complete}' file - then stage is done and no other
                                checking is done.

              'error' file - then stage failed.

              'start' file - then stage is running.

              Notification of stage start and end will be sent to
              addresses set via --email flag.

              Unless --customweekdir is set, this program will
              examine the 'celppdir' (last argument passed on
              commandline) to find the latest directory with this path:

              <year>/dataset.week.#

              The program will find the latest <year> and within
              that year the dataset.week.# with highest #.  The output
              directories created will be put within this directory.

              Setting --customweekdir will cause program to use 'celppdir'
              path.

              Setting the --createweekdir flag will instruct this
              program to create a new directory for the current
              celpp week/year before running any stage processing.

              NOTE: CELPP weeks start on Friday and end on Thursday
                    and week # follows ISO8601 rules so week numbers
                    at the end and start of the year are a bit
                    wonky.

              Breakdown of behavior of program is defined by
              value passed with --stage flag:

              If --stage '{createchallenge}'

              This is NOT a stage, but has the same effect as
              calling --stage makedb,import,blast,challengedata
              The four stages that need to run to generate the challenge
              data package.

              If --stage 'makedb'

              In this stage the file {pdb_seqres} is downloaded from
              an ftp site set by --pdbsequrl.
              This file is then gunzipped and NCBI makeblastdb
              (set by --makeblastdb) is run on it to create a blast
              database.  The files are stored in {makeblastdb_dirname}

              If --stage 'import'

              In this stage 4 files are downloaded from urls specified
              by --compinchi and --pdbfileurl flags on the commandline
              into {dataimport_dirname} directory.

              The tsv files are (--pdbfileurl flag sets url to
              download these files from):

              {nonpolymer_tsv}
              {sequence_tsv}
              {crystal_tsv}

              The Components ich file is (--compinchi flag sets base url to
              download this file from):

              {compinchi_ich}

              This stage will just wait and retry if any of the tsv files
              have NOT been updated since the start of the current
              celpp week as determined by a HEAD request. To bypass
              this delay add --skipimportwait flag.  --importsleep denotes
              the time to wait before re-examining the update time of the
              tsv files and --importretry sets number of times to retry
              before giving up.

              If --stage 'blast'

              Verifies {dataimport_dirname} exists and has '{complete}'
              file.  Also verifies {makeblastdb_dirname} exists and has
              '{complete}' file.  If both conditions are met then the
              'blast' stage is run which invokes script set by
              --blastnfilter flag and output stored in
              {blast_dirname}.
              Requires --pdbdb to be set to a directory with valid PDB
              database files.

              Note: --blastnfilter script is killed after time set with
              --blastnfiltertimeout flag.


              If --stage 'challengedata'

              Verifies {blast_dirname} exists and has '{complete}'
              file.  If complete, this stage runs which invokes program
              set in --genchallenge flag to create a challenge dataset
              file.  The --pdbdb flag must also be set when calling this
              stage. If --ftpconfig is set with {challengepath} field then
              this stage will also upload the challenge dataset tarfile
              to the ftp server with path set by {challengepath}.  The
              code will also upload a {latest_txt} file containing name
              of the tarfile to the same destination overwriting any
              {latest_txt} file that already exists.

              Example file for --ftpconfig:

              {host} some.ftp.com
              {user} bob
              {passn} mypass
              {path} /celpp
              {challengepath} /challenge
              {submissionpath} /submissions


              If --stage '{chimeraprep}'

              Verifies {challenge_dirname} exists and has '{complete}'
              file.  If complete, this stage runs which invokes program
              set in --chimeraprep flag to prepare pdb and inchi files
              storing output in {chimeraprep_dirname}.  --pdbdb flag
              must also be set when calling this stage.

              If --stage 'proteinligprep'

              Verifies {challenge_dirname} exists and has '{complete}'
              file.  If complete, this stage runs which invokes program
              set in --proteinligprep flag to prepare pdb and inchi files
              storing output in {proteinligprep_dirname}.  --pdbdb flag
              must also be set when calling this stage.

              If --stage 'extsubmission'

              Connects to server specified by --ftpconfig and downloads
              external docking submissions from {submissionpath} on remote
              server.

              Submissions should be named:

              celpp_weekXX_YYYY_dockedresults_ZZZZ.tar.gz as documented here:

              https://github.com/drugdata/d3r/wiki/Proposed-challenge-docked\
              -results-file-structure

              For each submission a directory named stage.X.ZZZZ.extsubmission
              will be created and uncompressed contents of package will be
              stored in that directory.  If data does not conform properly
              'error' file will be placed in directory denoting failure

              If --stage 'glide'

              Verifies {proteinligprep_dirname} exists and has a '{complete}'
              file within it.  If complete, this stage runs which invokes
              program set in --glide flag to perform docking via glide
              storing output in {glide_dirname}

              If --stage 'vina'

              Verifies {proteinligprep_dirname} exists and has a '{complete}'
              file within it.  If complete, this stage runs which invokes
              program set in --vina flag to perform docking via AutoDock Vina
              storing output in {vina_dirname}

              If --stage 'evaluation'

              Finds all stage.{dockstage}.<algo> directories with '{complete}'
              files in them which do not end in name '{webdata}' and runs
              script set via --evaluation parameter storing the result of
              the script into stage.{evalstage}.<algo>.evaluation. --pdbdb flag
              must also be set when calling this stage.


              """.format(makeblastdb_dirname=makedb.get_dir_name(),
                         dataimport_dirname=dataimport.get_dir_name(),
                         blast_dirname=blasttask.get_dir_name(),
                         challenge_dirname=challenge.get_dir_name(),
                         createchallenge=CREATE_CHALLENGE,
                         proteinligprep_dirname=prot.get_dir_name(),
                         glide_dirname=glide.get_dir_name(),
                         vina_dirname=vina.get_dir_name(),
                         dockstage=str(glide.get_stage()),
                         evalstage=str(glide.get_stage() + 1),
                         complete=blasttask.COMPLETE_FILE,
                         chimeraprep_dirname=chimeraprep.get_dir_name(),
                         chimeraprep=CHIMERA_PREP,
                         compinchi_ich=DataImportTask.COMPINCHI_ICH,
                         pdb_seqres=MakeBlastDBTask.PDB_SEQRES_TXT_GZ,
                         nonpolymer_tsv=DataImportTask.NONPOLYMER_TSV,
                         sequence_tsv=DataImportTask.SEQUENCE_TSV,
                         crystal_tsv=DataImportTask.CRYSTALPH_TSV,
                         webdata=EvaluationTaskFactory.WEB_DATA_SUFFIX,
                         latest_txt=ChallengeDataTask.LATEST_TXT,
                         host=FtpFileTransfer.HOST,
                         user=FtpFileTransfer.USER,
                         passn=FtpFileTransfer.PASS,
                         path=FtpFileTransfer.PATH,
                         challengepath=FtpFileTransfer.CHALLENGEPATH,
                         submissionpath=FtpFileTransfer.SUBMISSIONPATH,
                         version=d3r.__version__)

    theargs = _parse_arguments(desc, sys.argv[1:])
    theargs.program = sys.argv[0]
    theargs.version = d3r.__version__

    util.setup_logging(theargs)

    try:
        run_stages(theargs)
    except Exception:
        logger.exception("Error caught exception")
        sys.exit(2)
Esempio n. 14
0
def get_task_list_for_stage(theargs, stage_name):
    """Factory method that generates a list of tasks for given stage

       Using stage_name get the list of tasks that need to
       be run.
       :param theargs: parameters set via commandline along with
                       ``theargs.latest_weekly`` which should be set to
                       to base directory where stages will be run
       :param stage_name:  Name of stage to run
    """
    if stage_name is None:
        raise NotImplementedError('stage_name is None')

    task_list = []

    logger.debug('Getting task list for ' + stage_name)

    if stage_name == CREATE_CHALLENGE:
        task_list.append(MakeBlastDBTask(theargs.latest_weekly, theargs))
        task_list.append(DataImportTask(theargs.latest_weekly, theargs))
        task_list.append(BlastNFilterTask(theargs.latest_weekly, theargs))
        task_list.append(ChallengeDataTask(theargs.latest_weekly, theargs))

    if stage_name == 'makedb':
        task_list.append(MakeBlastDBTask(theargs.latest_weekly, theargs))

    if stage_name == 'import':
        task_list.append(DataImportTask(theargs.latest_weekly, theargs))

    if stage_name == 'blast':
        task_list.append(BlastNFilterTask(theargs.latest_weekly, theargs))

    if stage_name == 'challengedata':
        task_list.append(ChallengeDataTask(theargs.latest_weekly, theargs))

    if stage_name == 'proteinligprep':
        task_list.append(ProteinLigPrepTask(theargs.latest_weekly, theargs))

    if stage_name == 'glide':
        task_list.append(GlideTask(theargs.latest_weekly, theargs))

    if stage_name == 'vina':
        task_list.append(AutoDockVinaTask(theargs.latest_weekly, theargs))

    if stage_name == CHIMERA_PREP:
        task_list.append(
            ChimeraProteinLigPrepTask(theargs.latest_weekly, theargs))
    if stage_name == 'extsubmission':
        extfac = ExternalDataSubmissionFactory(theargs.latest_weekly, theargs)
        task_list.extend(extfac.get_external_data_submissions())

    if stage_name == 'evaluation':
        # use util function call to get all evaluation tasks
        # append them to the task_list
        eval_task_factory = EvaluationTaskFactory(theargs.latest_weekly,
                                                  theargs)
        task_list.extend(eval_task_factory.get_evaluation_tasks())

    if len(task_list) is 0:
        raise NotImplementedError('uh oh no tasks for ' + stage_name +
                                  ' stage')

    return task_list
Esempio n. 15
0
class TestCelppRunner(unittest.TestCase):
    """Tests celpprunner command line script
    """
    param = D3RParameters()

    blast = BlastNFilterTask('/foo', param)
    BLAST_DIR_NAME = blast.get_dir_name()
    BLAST_NAME = blast.get_name()

    data = DataImportTask('/foo', param)
    IMPORT_DIR_NAME = data.get_dir_name()
    IMPORT_NAME = data.get_name()

    makedb = MakeBlastDBTask('/foo', param)
    MAKEDB_DIR_NAME = makedb.get_dir_name()
    MAKEDB_NAME = makedb.get_name()

    glide = GlideTask('/foo', param)
    GLIDE_DIR_NAME = glide.get_dir_name()

    prot = ProteinLigPrepTask('/foo', param)
    PROT_DIR_NAME = prot.get_dir_name()

    vina = AutoDockVinaTask('/foo', param)
    VINA_DIR_NAME = vina.get_dir_name()

    chall = ChallengeDataTask('/foo', param)
    CHALL_DIR_NAME = chall.get_dir_name()
    CHALL_NAME = chall.get_name()

    chimeraprep = ChimeraProteinLigPrepTask('/foo', param)
    CHIMERAPREP_DIR_NAME = chimeraprep.get_dir_name()

    def setUp(self):
        pass

    def test_get_lock(self):
        temp_dir = tempfile.mkdtemp()
        try:
            theargs = D3RParameters()
            theargs.latest_weekly = temp_dir

            # get the lock file which should work
            lock = celpprunner._get_lock(theargs, 'blast')
            expectedLockFile = os.path.join(temp_dir,
                                            'celpprunner.blast.lockpid')
            self.assertTrue(os.path.isfile(expectedLockFile))

            # try getting lock again which should also work
            lock = celpprunner._get_lock(theargs, 'blast')

            lock.release()
            self.assertFalse(os.path.isfile(expectedLockFile))
        finally:
            shutil.rmtree(temp_dir)

    def test_get_lock_where_lockfile_exists_and_process_is_running(self):
        temp_dir = tempfile.mkdtemp()
        try:
            theargs = D3RParameters()
            theargs.latest_weekly = temp_dir

            lockfile = os.path.join(temp_dir,
                                    'celpprunner.blast.lockpid')
            f = open(lockfile, 'w')
            pid = str(os.getppid())
            f.write(pid)
            f.flush()
            f.close()
            # get the lock file which should work
            try:
                celpprunner._get_lock(theargs, 'blast')
                self.fail('Expected Exception')
            except Exception as e:
                self.assertEqual(str(e), 'celpprunner with pid ' +
                                 pid + ' is running')
        finally:
            shutil.rmtree(temp_dir)

    def test_parse_arguments(self):
        theargs = ['--stage', 'blast', 'foo']
        result = celpprunner._parse_arguments('hi', theargs)
        self.assertEqual(result.stage, 'blast')
        self.assertEqual(result.celppdir, 'foo')
        self.assertEqual(result.email, None)
        self.assertEqual(result.summaryemail, None)
        self.assertEqual(result.loglevel, celpprunner.DEFAULT_LOG_LEVEL)
        self.assertEqual(result.blastnfilter, 'blastnfilter.py')
        self.assertEqual(result.proteinligprep, 'proteinligprep.py')
        self.assertEqual(result.evaluation, 'evaluate.py')
        self.assertEqual(result.makeblastdb, 'makeblastdb')
        self.assertEqual(result.genchallenge, 'genchallengedata.py')
        self.assertEqual(result.chimeraprep, 'chimera_proteinligprep.py')
        self.assertEqual(result.skipimportwait, False)
        self.assertEqual(result.importretry, 60)
        self.assertEqual(result.importsleep, 600)
        self.assertEqual(result.rdkitpython, '')
        self.assertEqual(result.summaryemail, None)
        self.assertEqual(result.postevaluation, 'post_evaluation.py')
        theargs = ['foo', '--stage', 'dock,glide', '--email', '[email protected],h@h',
                   '--log', 'ERROR',
                   '--blastnfilter', '/bin/blastnfilter.py',
                   '--proteinligprep', '/bin/proteinligprep.py',
                   '--postanalysis', '/bin/postanalysis.py',
                   '--glide', '/bin/glide.py',
                   '--vina', '/bin/vina.py',
                   '--customweekdir',
                   '--evaluation', '/bin/evaluation.py',
                   '--makeblastdb', '/bin/makeblastdb',
                   '--genchallenge', '/bin/gen.py',
                   '--chimeraprep', '/bin/chimeraprep.py',
                   '--skipimportwait',
                   '--importretry', '10',
                   '--importsleep', '30',
                   '--rdkitpython', '/usr/bin',
                   '--summaryemail', 'j@j,g@g',
                   '--postevaluation', '/bin/yo.py']
        result = celpprunner._parse_arguments('hi', theargs)
        self.assertEqual(result.stage, 'dock,glide')
        self.assertEqual(result.celppdir, 'foo')
        self.assertEqual(result.email, '[email protected],h@h')
        self.assertEqual(result.summaryemail, 'j@j,g@g')
        self.assertEqual(result.loglevel, 'ERROR')
        self.assertEqual(result.blastnfilter, '/bin/blastnfilter.py')
        self.assertEqual(result.proteinligprep, '/bin/proteinligprep.py')
        self.assertEquals(result.postanalysis, '/bin/postanalysis.py')
        self.assertEquals(result.glide, '/bin/glide.py')
        self.assertEquals(result.evaluation, '/bin/evaluation.py')
        self.assertEquals(result.customweekdir, True)
        self.assertEqual(result.makeblastdb, '/bin/makeblastdb')
        self.assertEqual(result.vina, '/bin/vina.py')
        self.assertEqual(result.genchallenge, '/bin/gen.py')
        self.assertEqual(result.chimeraprep, '/bin/chimeraprep.py')
        self.assertEqual(result.skipimportwait, True)
        self.assertEqual(result.importretry, 10)
        self.assertEqual(result.importsleep, 30)
        self.assertEqual(result.rdkitpython, '/usr/bin')
        self.assertEqual(result.postevaluation, '/bin/yo.py')

    def test_run_tasks_passing_none_and_empty_list(self):
        self.assertEquals(celpprunner.run_tasks(None), 3)
        task_list = []
        self.assertEquals(celpprunner.run_tasks(task_list), 2)

    def test_run_one_successful_task(self):
        success_task = DummyTask(D3RParameters(), 'foo', None, True, None,
                                 None)
        success_task.set_name('dummy')
        task_list = []
        task_list.append(success_task)
        self.assertEquals(celpprunner.run_tasks(task_list), 0)

    def test_run_one_fail_task_with_error_message(self):
        task = DummyTask(D3RParameters(), 'foo', 'someerror', True, None, None)
        task.set_name('dummy')
        task_list = []
        task_list.append(task)
        self.assertEquals(celpprunner.run_tasks(task_list), 1)
        self.assertEquals(task.get_error(), 'someerror')

    def test_run_one_fail_task_with_exception_and_no_message(self):
        task = DummyTask(D3RParameters(), 'foo', None, True,
                         None, Exception('hi'))
        task.set_name('dummy')
        task_list = []
        task_list.append(task)
        self.assertEquals(celpprunner.run_tasks(task_list), 1)
        self.assertEquals(task.get_error(),
                          'Caught Exception running task: hi')

    def test_run_two_tasks_success(self):
        task_list = []
        task = DummyTask(D3RParameters(), 'foo', None, True, None, None)
        task.set_name('dummy')
        task_list.append(task)
        task_list.append(task)

        self.assertEquals(celpprunner.run_tasks(task_list), 0)
        self.assertEquals(task._run_count, 2)

    def test_run_two_tasks_second_task_has_error(self):
        task_list = []
        task = DummyTask(D3RParameters(), 'foo', None, True, None, None)
        task.set_name('dummy')
        task_list.append(task)

        task_two = DummyTask(D3RParameters(), 'foo', None, True,
                             None, Exception('hi'))
        task_two.set_name('dummy')
        task_list.append(task_two)

        self.assertEquals(celpprunner.run_tasks(task_list), 1)
        self.assertEquals(task._run_count, 1)
        self.assertEquals(task_two._run_count, 1)
        self.assertEquals(task_two.get_error(),
                          'Caught Exception running task: hi')

    def test_run_two_tasks_first_task_has_error(self):
        task_list = []
        task = DummyTask(D3RParameters(), 'foo', None, True, None,
                         Exception('hi'))
        task.set_name('dummy')
        task_list.append(task)

        task_two = DummyTask(D3RParameters(), 'foo', None, True, None,
                             None)
        task_two.set_name('dummy')
        task_list.append(task_two)

        self.assertEquals(celpprunner.run_tasks(task_list), 1)
        self.assertEquals(task.get_error(),
                          'Caught Exception running task: hi')

        self.assertEquals(task._run_count, 1)
        self.assertEquals(task_two._run_count, 1)

    def test_get_set_of_email_address_from_email_flags(self):

        params = D3RParameters()
        res = celpprunner._get_set_of_email_address_from_email_flags(params)
        self.assertEqual(res, None)

        # email set
        params.email = '*****@*****.**'
        res = celpprunner._get_set_of_email_address_from_email_flags(params)
        self.assertEqual(res, ['*****@*****.**'])

        # email set w dup
        params.email = '[email protected],[email protected],[email protected]'
        res = celpprunner._get_set_of_email_address_from_email_flags(params)
        self.assertEqual(res, ['*****@*****.**', '*****@*****.**'])

        # summary set only
        params.email = None
        params.summaryemail = '*****@*****.**'
        res = celpprunner._get_set_of_email_address_from_email_flags(params)
        self.assertEqual(res, ['*****@*****.**'])

        # summary w dups
        params.summaryemail = '[email protected],[email protected],[email protected]'
        res = celpprunner._get_set_of_email_address_from_email_flags(params)
        self.assertEqual(res, ['*****@*****.**', '*****@*****.**'])

        # both set
        params.email = '[email protected],[email protected]'
        res = celpprunner._get_set_of_email_address_from_email_flags(params)
        self.assertEqual(res, ['*****@*****.**', '*****@*****.**', '*****@*****.**'])

    def test_get_task_list_for_stage_with_invalid_stage_name(self):

        try:
            celpprunner.get_task_list_for_stage(D3RParameters(), None)
            self.fail('Expected exception')
        except NotImplementedError as e:
            self.assertEquals(e.message, 'stage_name is None')

        try:
            celpprunner.get_task_list_for_stage(D3RParameters(), '')
            self.fail('Expected exception')
        except NotImplementedError as e:
            self.assertEquals(e.message, 'uh oh no tasks for  stage')

        try:
            celpprunner.get_task_list_for_stage(D3RParameters(), 'foo')
            self.fail('Expected exception')
        except NotImplementedError as e:
            self.assertEquals(e.message, 'uh oh no tasks for foo stage')

    def test_get_task_list_for_stage_with_valid_stages(self):
        params = D3RParameters()
        params.latest_weekly = 'foo'
        task_list = celpprunner.get_task_list_for_stage(params, 'blast')
        self.assertEquals(len(task_list), 1)

        self.assertEquals(task_list[0].get_dir(),
                          os.path.join('foo', TestCelppRunner.BLAST_DIR_NAME))

        task_list = celpprunner.get_task_list_for_stage(params,
                                                        'proteinligprep')
        self.assertEquals(len(task_list), 1)

        self.assertEquals(task_list[0].get_dir(),
                          os.path.join('foo', TestCelppRunner.PROT_DIR_NAME))

        task_list = celpprunner.get_task_list_for_stage(params, 'import')
        self.assertEquals(len(task_list), 1)
        self.assertEquals(task_list[0].get_dir(),
                          os.path.join('foo', TestCelppRunner.IMPORT_DIR_NAME))

        task_list = celpprunner.get_task_list_for_stage(params, 'glide')
        self.assertEquals(len(task_list), 1)
        self.assertEquals(task_list[0].get_dir(),
                          os.path.join('foo', TestCelppRunner.GLIDE_DIR_NAME))

        task_list = celpprunner.get_task_list_for_stage(params, 'vina')
        self.assertEquals(len(task_list), 1)
        self.assertEquals(task_list[0].get_dir(),
                          os.path.join('foo', TestCelppRunner.VINA_DIR_NAME))

        task_list = celpprunner.get_task_list_for_stage(params,
                                                        'challengedata')
        self.assertEquals(len(task_list), 1)
        self.assertEquals(task_list[0].get_dir(),
                          os.path.join('foo', TestCelppRunner.CHALL_DIR_NAME))

        task_list = celpprunner.get_task_list_for_stage(params,
                                                        'chimeraprep')
        self.assertEquals(len(task_list), 1)
        self.assertEquals(task_list[0].get_dir(),
                          os.path.join('foo',
                                       TestCelppRunner.CHIMERAPREP_DIR_NAME))

    def test_get_task_list_for_stage_createchallenge(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            params.latest_weekly = temp_dir
            task_list = celpprunner.get_task_list_for_stage(
                params, celpprunner.CREATE_CHALLENGE)
            self.assertEqual(len(task_list), 4)
            self.assertEqual(task_list[0].get_name(),
                             TestCelppRunner.MAKEDB_NAME)
            self.assertEqual(task_list[1].get_name(),
                             TestCelppRunner.IMPORT_NAME)
            self.assertEqual(task_list[2].get_name(),
                             TestCelppRunner.BLAST_NAME)
            self.assertEqual(task_list[3].get_name(),
                             TestCelppRunner.CHALL_NAME)

        finally:
            shutil.rmtree(temp_dir)

    def test_get_task_list_for_stage_for_scoring_stage_with_nonefound(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            params.latest_weekly = temp_dir
            try:
                celpprunner.get_task_list_for_stage(params, 'evaluation')
            except NotImplementedError as e:
                self.assertEqual(e.message,
                                 'uh oh no tasks for evaluation stage')
        finally:
            shutil.rmtree(temp_dir)

    def test_get_task_list_for_stage_for_scoring_stage_with_onefound(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            blasttask = BlastNFilterTask(temp_dir, params)
            blasttask.create_dir()
            open(os.path.join(blasttask.get_dir(),
                              D3RTask.COMPLETE_FILE), 'a').close()
            params.latest_weekly = temp_dir
            glidedir = os.path.join(temp_dir,
                                    EvaluationTaskFactory.DOCKSTAGE_PREFIX +
                                    'glide')
            os.mkdir(glidedir)
            open(os.path.join(glidedir, D3RTask.COMPLETE_FILE), 'a').close()
            task_list = celpprunner.get_task_list_for_stage(params,
                                                            'evaluation')
            self.assertEqual(len(task_list), 1)
            self.assertEqual(task_list[0].get_name(), 'glide.evaluation')
        finally:
            shutil.rmtree(temp_dir)

    def test_get_task_list_for_stage_for_scoring_stage_with_twofound(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            blasttask = BlastNFilterTask(temp_dir, params)
            blasttask.create_dir()
            open(os.path.join(blasttask.get_dir(),
                              D3RTask.COMPLETE_FILE), 'a').close()
            params.latest_weekly = temp_dir
            glidedir = os.path.join(temp_dir,
                                    EvaluationTaskFactory.DOCKSTAGE_PREFIX +
                                    'glide')
            os.mkdir(glidedir)
            open(os.path.join(glidedir, D3RTask.COMPLETE_FILE), 'a').close()
            freddir = os.path.join(temp_dir,
                                   EvaluationTaskFactory.DOCKSTAGE_PREFIX +
                                   'fred')
            os.mkdir(freddir)
            open(os.path.join(freddir, D3RTask.COMPLETE_FILE), 'a').close()

            task_list = celpprunner.get_task_list_for_stage(params,
                                                            'evaluation')
            self.assertEqual(len(task_list), 2)
        finally:
            shutil.rmtree(temp_dir)

    def test_run_stages_no_weekly_datasetfound(self):
        temp_dir = tempfile.mkdtemp()

        try:
            theargs = D3RParameters()
            theargs.celppdir = temp_dir
            self.assertEqual(celpprunner.run_stages(theargs), 0)

        finally:
            shutil.rmtree(temp_dir)

    def test_run_stages_invalid_stage(self):
        temp_dir = tempfile.mkdtemp()
        try:
            theargs = D3RParameters()
            theargs.celppdir = temp_dir
            theargs.stage = 'foo'
            os.mkdir(os.path.join(temp_dir, '2015'))
            os.mkdir(os.path.join(temp_dir, '2015', 'dataset.week.1'))
            try:
                celpprunner.run_stages(theargs)
            except NotImplementedError as e:
                self.assertEquals(e.message, 'uh oh no tasks for foo stage')
        finally:
            shutil.rmtree(temp_dir)

    def test_run_stages_blast_stage_data_import_missing(self):
        temp_dir = tempfile.mkdtemp()
        try:
            theargs = D3RParameters()
            theargs.celppdir = temp_dir
            theargs.stage = 'blast'
            os.mkdir(os.path.join(temp_dir, '2015'))
            os.mkdir(os.path.join(temp_dir, '2015', 'dataset.week.1'))
            makedb_dir = os.path.join(temp_dir, '2015', 'dataset.week.1',
                                      TestCelppRunner.MAKEDB_DIR_NAME)
            os.makedirs(makedb_dir)
            open(os.path.join(makedb_dir, 'complete'), 'a').close()
            self.assertEquals(celpprunner.run_stages(theargs), 1)

        finally:
            shutil.rmtree(temp_dir)

    def test_run_stages_blast(self):
        temp_dir = tempfile.mkdtemp()

        try:
            theargs = D3RParameters()
            theargs.celppdir = os.path.join(temp_dir)
            theargs.stage = 'blast'
            theargs.pdbdb = '/pdbdb'

            makedb_dir = os.path.join(temp_dir, '2015', 'dataset.week.1',
                                      TestCelppRunner.MAKEDB_DIR_NAME)
            os.makedirs(makedb_dir)
            open(os.path.join(makedb_dir, 'complete'), 'a').close()

            d_import_dir = os.path.join(temp_dir, '2015', 'dataset.week.1',
                                        TestCelppRunner.IMPORT_DIR_NAME)
            os.makedirs(d_import_dir)
            open(os.path.join(d_import_dir, 'complete'), 'a').close()

            theargs.blastnfilter = 'echo'
            theargs.postanalysis = 'true'
            self.assertEqual(celpprunner.run_stages(theargs), 0)

        finally:
            shutil.rmtree(temp_dir)

    def test_run_stages_blast_has_error(self):
        temp_dir = tempfile.mkdtemp()

        try:
            theargs = D3RParameters()
            theargs.celppdir = os.path.join(temp_dir)
            theargs.stage = 'blast'
            os.mkdir(os.path.join(temp_dir, TestCelppRunner.MAKEDB_DIR_NAME))
            open(os.path.join(temp_dir, TestCelppRunner.MAKEDB_DIR_NAME,
                              'error'),
                 'a').close()
            os.mkdir(os.path.join(temp_dir, '2015'))
            os.mkdir(os.path.join(temp_dir, '2015', 'dataset.week.1'))
            self.assertEqual(celpprunner.run_stages(theargs), 1)

        finally:
            shutil.rmtree(temp_dir)

    def test_run_stages_challenge_and_proteinligprep_no_error(self):
        temp_dir = tempfile.mkdtemp()

        try:
            theargs = D3RParameters()
            theargs.pdbdb = '/pdbdb'
            theargs.celppdir = os.path.join(temp_dir)
            theargs.stage = 'challengedata,proteinligprep'

            blastdb_dir = os.path.join(temp_dir, '2015', 'dataset.week.1',
                                       TestCelppRunner.BLAST_DIR_NAME)
            os.makedirs(blastdb_dir)
            open(os.path.join(blastdb_dir, 'complete'), 'a').close()

            theargs.proteinligprep = 'echo'
            theargs.genchallenge = 'echo'

            self.assertEqual(celpprunner.run_stages(theargs), 0)
        finally:
            shutil.rmtree(temp_dir)

    def test_run_stages_blast_and_proteinligprep_blast_has_error(self):
        temp_dir = tempfile.mkdtemp()

        try:
            theargs = D3RParameters()
            theargs.celppdir = os.path.join(temp_dir)
            theargs.stage = 'blast,proteinligprep'
            os.mkdir(os.path.join(temp_dir, TestCelppRunner.MAKEDB_DIR_NAME))
            open(os.path.join(temp_dir, TestCelppRunner.MAKEDB_DIR_NAME,
                              'complete'),
                 'a').close()
            d_import_dir = os.path.join(temp_dir, '2015', 'dataset.week.1',
                                        TestCelppRunner.IMPORT_DIR_NAME)
            os.makedirs(d_import_dir)
            open(os.path.join(d_import_dir, D3RTask.ERROR_FILE), 'a').close()
            theargs.blastnfilter = 'echo'
            theargs.proteinligprep = 'echo'
            self.assertEqual(celpprunner.run_stages(theargs), 1)

        finally:
            shutil.rmtree(temp_dir)

    def test_run_stages_makedb_blast_chall_proteinligprep_glide_no_error(self):
        temp_dir = tempfile.mkdtemp()

        try:
            theargs = D3RParameters()
            theargs.pdbdb = '/pdbdb'
            theargs.celppdir = os.path.join(temp_dir)

            theargs.stage = 'makedb,blast,challengedata,proteinligprep,glide'

            d_import_dir = os.path.join(temp_dir, '2015', 'dataset.week.1',
                                        TestCelppRunner.IMPORT_DIR_NAME)
            os.makedirs(d_import_dir)
            open(os.path.join(d_import_dir,
                              D3RTask.COMPLETE_FILE), 'a').close()

            fakegz = os.path.join(temp_dir, 'fake.gz')

            f = gzip.open(fakegz, 'wb')
            f.write('hello\n')
            f.flush()
            f.close()

            theargs.pdbsequrl = 'file://'+fakegz

            theargs.makeblastdb = 'echo'
            theargs.blastnfilter = 'echo'
            theargs.postanalysis = 'true'
            theargs.proteinligprep = 'echo'
            theargs.glide = 'echo'
            theargs.genchallenge = 'echo'
            self.assertEqual(celpprunner.run_stages(theargs), 0)

        finally:
            shutil.rmtree(temp_dir)

    def test_run_stages_makedb_through_glide(self):
        """This should test the following stages will run
           makedb,import,blast,challengedata,proteinligprep,glide,vina
        """
        temp_dir = tempfile.mkdtemp()

        try:
            theargs = D3RParameters()
            theargs.pdbdb = '/pdbdb'
            theargs.celppdir = os.path.join(temp_dir)

            theargs.stage = 'makedb,import,blast,challengedata,proteinligprep,' \
                            'chimeraprep,glide,vina'

            d_import_dir = os.path.join(temp_dir, '2015', 'dataset.week.1',
                                        TestCelppRunner.IMPORT_DIR_NAME)
            os.makedirs(d_import_dir)
            open(os.path.join(d_import_dir,
                              D3RTask.COMPLETE_FILE), 'a').close()

            fakegz = os.path.join(temp_dir, 'fake.gz')

            f = gzip.open(fakegz, 'wb')
            f.write('hello\n')
            f.flush()
            f.close()

            theargs.pdbsequrl = 'file://' + fakegz
            theargs.pdbfileurl = 'file://' + fakegz

            theargs.compinchi = 'file://' + fakegz
            theargs.version = '1.0.0'
            theargs.makeblastdb = 'echo'
            theargs.blastnfilter = 'echo'
            theargs.postanalysis = 'true'
            theargs.proteinligprep = 'echo'
            theargs.glide = 'echo'
            theargs.vina = 'echo'
            theargs.genchallenge = 'echo'
            theargs.chimeraprep = 'echo'
            self.assertEqual(celpprunner.run_stages(theargs), 0)

        finally:
            shutil.rmtree(temp_dir)

    def test_get_task_list_for_stage_extsubmission(self):
        temp_dir = tempfile.mkdtemp()
        try:
            theargs = D3RParameters()
            theargs.pdbdb = '/pdbdb'
            theargs.latest_weekly = temp_dir
            theargs.stage = 'extsubmission'
            try:
                celpprunner.get_task_list_for_stage(theargs, 'extsubmission')
                self.fail('expected NotImplementedError')
            except NotImplementedError:
                pass

        finally:
            shutil.rmtree(temp_dir)

    def test_get_task_list_for_stage_postevaluation(self):
        temp_dir = tempfile.mkdtemp()
        try:
            theargs = D3RParameters()
            theargs.latest_weekly = temp_dir
            theargs.stage = celpprunner.POST_EVAL
            t_list = celpprunner.get_task_list_for_stage(theargs,
                                                         theargs.stage)
            self.assertEqual(len(t_list), 1)
            self.assertEqual(t_list[0].get_name(), 'postevaluation')

            # try this time with email set
            theargs.email = '[email protected],[email protected]'
            t_list = celpprunner.get_task_list_for_stage(theargs,
                                                         theargs.stage)
            self.assertEqual(len(t_list), 1)
            self.assertEqual(t_list[0].get_name(), 'postevaluation')

            # try this time with both set
            theargs.email = '[email protected],[email protected]'
            theargs.summaryemail = '[email protected],[email protected]'
            t_list = celpprunner.get_task_list_for_stage(theargs,
                                                         theargs.stage)
            self.assertEqual(len(t_list), 1)
            self.assertEqual(t_list[0].get_name(), 'postevaluation')
        finally:
            shutil.rmtree(temp_dir)

    def test_run_stages_createweekdir_set(self):
        temp_dir = tempfile.mkdtemp()
        try:
            theargs = D3RParameters()
            theargs.celppdir = os.path.join(temp_dir)
            theargs.createweekdir = True
            theargs.stage = ''
            d = date.today()
            celp_week = util.get_celpp_week_of_year_from_date(d)
            try:
                self.assertEquals(celpprunner.run_stages(theargs), 0)
                self.fail('Expected NotImplementedError')
            except NotImplementedError:
                pass

            expected_dir = os.path.join(temp_dir, str(celp_week[1]),
                                        'dataset.week.' +
                                        str(celp_week[0]))
            self.assertEquals(os.path.isdir(expected_dir), True)

        finally:
            shutil.rmtree(temp_dir)

    def test_run_stages_customweekdir_set(self):
        temp_dir = tempfile.mkdtemp()
        try:
            theargs = D3RParameters()
            theargs.celppdir = temp_dir
            theargs.customweekdir = True
            theargs.createweekdir = True
            theargs.stage = ''
            try:
                self.assertEquals(celpprunner.run_stages(theargs), 0)
                self.fail('Expected NotImplementedError')
            except NotImplementedError:
                pass

        finally:
            shutil.rmtree(temp_dir)

    def test_main_success(self):
        temp_dir = tempfile.mkdtemp()

        try:
            theargs = ['celpprunner.py', '--stage',
                       'blast', '--pdbdb', '/pdbdb',
                       '--blastnfilter', 'echo',
                       '--postanalysis', 'true',
                       temp_dir]

            makedb_dir = os.path.join(temp_dir, '2015', 'dataset.week.1',
                                      TestCelppRunner.MAKEDB_DIR_NAME)
            os.makedirs(makedb_dir)
            open(os.path.join(makedb_dir, 'complete'), 'a').close()

            d_import_dir = os.path.join(temp_dir, '2015', 'dataset.week.1',
                                        TestCelppRunner.IMPORT_DIR_NAME)
            os.makedirs(d_import_dir)
            open(os.path.join(d_import_dir, 'complete'), 'a').close()

            self.assertEqual(celpprunner.main(theargs), 0)

        finally:
            shutil.rmtree(temp_dir)

    def test_main_where_run_stages_raises_error(self):
        temp_dir = tempfile.mkdtemp()

        try:
            theargs = ['celpprunner.py', '--stage',
                       'foo', os.path.join(temp_dir, 'notexistdir')]
            self.assertEqual(celpprunner.main(theargs), 2)

        finally:
            shutil.rmtree(temp_dir)

    def tearDown(self):
        pass
Esempio n. 16
0
    def test_get_uploadable_files(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            task = ProteinLigPrepTask(temp_dir, params)
            # try with no dir
            self.assertEqual(task.get_uploadable_files(), [])

            # try with empty dir
            task.create_dir()
            self.assertEqual(task.get_uploadable_files(), [])

            # try with final log
            final_log = os.path.join(task.get_dir(),
                                     ProteinLigPrepTask.FINAL_LOG)
            open(final_log, 'a').close()
            flist = task.get_uploadable_files()
            self.assertEqual(len(flist), 1)
            flist.index(final_log)

            # try with pbid folder that is empty
            pbdid = os.path.join(task.get_dir(), '4abc')
            os.mkdir(pbdid)
            flist = task.get_uploadable_files()
            self.assertEqual(len(flist), 1)

            # try with pbdid folder with ligand.mae
            ligand = os.path.join(pbdid, 'ligand.mae')
            open(ligand, 'a').close()
            flist = task.get_uploadable_files()
            self.assertEqual(len(flist), 2)
            flist.index(ligand)

            # try with pbdid folder with LMCSS.maegz
            LMCSS = os.path.join(pbdid, 'LMCSS.maegz')
            open(LMCSS, 'a').close()
            flist = task.get_uploadable_files()
            self.assertEqual(len(flist), 3)
            flist.index(LMCSS)

            # try with pbdid folder with SMCSS.maegz
            SMCSS = os.path.join(pbdid, 'SMCSS.maegz')
            open(SMCSS, 'a').close()
            flist = task.get_uploadable_files()
            self.assertEqual(len(flist), 4)
            flist.index(SMCSS)

            # try with pbdid folder with hiResApo.maegz
            hiResApo = os.path.join(pbdid, 'hiResApo.maegz')
            open(hiResApo, 'a').close()
            flist = task.get_uploadable_files()
            self.assertEqual(len(flist), 5)
            flist.index(hiResApo)

            # try with pbdid folder with hiResHolo.maegz
            hiResHolo = os.path.join(pbdid, 'hiResHolo.maegz')
            open(hiResHolo, 'a').close()
            flist = task.get_uploadable_files()
            self.assertEqual(len(flist), 6)
            flist.index(hiResHolo)

            # add error out files and try with second pbdid folder
            # with ligand.mae
            errfile = os.path.join(task.get_dir(), 'proteinligprep.py.stderr')
            open(errfile, 'a').close()
            outfile = os.path.join(task.get_dir(), 'proteinligprep.py.stdout')
            open(outfile, 'a').close()

            pbdtwo = os.path.join(task.get_dir(), '3zaa')
            os.mkdir(pbdtwo)
            ligandtwo = os.path.join(pbdtwo, 'ligand.mae')
            open(ligandtwo, 'a').close()
            flist = task.get_uploadable_files()
            self.assertEqual(len(flist), 9)
            flist.index(errfile)
            flist.index(outfile)
            flist.index(ligand)
            flist.index(ligandtwo)
            flist.index(hiResApo)
            flist.index(LMCSS)
            flist.index(final_log)
            flist.index(hiResHolo)
            flist.index(SMCSS)

        finally:
            shutil.rmtree(temp_dir)
Esempio n. 17
0
class EvaluationTaskFactory(object):
    """Factory class to generate ScoringTask objects

       This factory examines a celpp week directory for
       all docking tasks.  The code then generates
       ScoringTask objects for all eligible docking tasks
    """
    prep = ProteinLigPrepTask('/foo', D3RParameters())
    DOCKSTAGE = prep.get_stage() + 1
    DOCKSTAGE_PREFIX = (D3RTask.STAGE_DIRNAME_PREFIX + '.' + str(DOCKSTAGE) +
                        '.')
    SCORING_SUFFIX = 'evaluation'
    WEB_DATA_SUFFIX = 'webdata'

    def __init__(self, path, theargs):
        """Constructor
        """
        self.set_path(path)
        self.set_args(theargs)

    def set_args(self, theargs):
        """ Sets args
        :param theargs: arguments to set
        """
        self._args = theargs

    def get_args(self):
        """Gets args passed into constructor or via set_args()
        """
        return self._args

    def set_path(self, path):
        """Sets path used to look for docking tasks
        """
        self._path = path

    def get_path(self):
        """Gets path used to look for docking tasks
        """
        return self._path

    def _get_participant_database(self):
        """Creates `ParticipantDatabase`
        :returns: ParticipantDatabase
        """
        dimport = DataImportTask(self.get_path(), self.get_args())
        csvfile = dimport.get_participant_list_csv()
        pfac = ParticipantDatabaseFromCSVFactory(csvfile)
        return pfac.get_participant_database()

    def _get_value_of_replytoaddress(self):
        """Attempts to get value of replytoaddress from `get_args()`
        :returns None if replytoaddress is None or does not exist in object
        """
        try:
            return self.get_args().replytoaddress
        except AttributeError:
            logger.debug('replytoaddress not set in get_args()')
            return None

    def get_evaluation_tasks(self):
        """Generate EvaluationTasks

           This method examines the path directory
           set via the constructor or set_path() method
           for all stage 4 tasks excluding tasks
           that end with 'webdata'  A EvaluationTask
           object is created for each of these tasks
           and returned in a list.
           :return: list of EvaluationTask objects or empty list if none found
        """
        path = self.get_path()
        logger.debug('Examining ' + path + ' for docking tasks')
        if not os.path.isdir(path):
            raise PathNotDirectoryError(path + ' is not a directory')
        scoring_tasks = []

        path_list = os.listdir(path)

        participant_db = self._get_participant_database()
        emailer = EvaluationEmailer(participant_db,
                                    self._get_value_of_replytoaddress())

        for entry in path_list:
            logger.debug('Checking if ' + entry + ' is a docking task')
            full_path = os.path.join(path, entry)
            if os.path.isdir(full_path):
                if entry.startswith(EvaluationTaskFactory.DOCKSTAGE_PREFIX):
                    if entry.endswith(EvaluationTaskFactory.WEB_DATA_SUFFIX):
                        logger.debug('Skipping ' + entry + ' due to suffix')
                        continue

                    # we have a valid docking path
                    docktask = D3RTask(path, self.get_args())
                    docktask.set_stage(EvaluationTaskFactory.DOCKSTAGE)
                    docktask.set_name(
                        entry[len(EvaluationTaskFactory.DOCKSTAGE_PREFIX):])
                    stask = EvaluationTask(
                        path,
                        docktask.get_name() + '.' +
                        EvaluationTaskFactory.SCORING_SUFFIX, docktask,
                        self.get_args())
                    if stask.can_run():
                        logger.debug('Adding task ' + stask.get_name())
                        stask.set_evaluation_emailer(emailer)
                        scoring_tasks.append(stask)
                    else:
                        if stask.get_error() is None:
                            logger.debug(stask.get_name() + ' cannot be' +
                                         ' added, no error though')
                        else:
                            logger.debug(stask.get_name() + ' cannot be' +
                                         ' added : ' + stask.get_error())

        return scoring_tasks
Esempio n. 18
0
    def test_can_run(self):
        temp_dir = tempfile.mkdtemp()
        try:
            # no blast task found so it cannot run
            params = D3RParameters()
            proteinligprep = ProteinLigPrepTask(temp_dir, params)
            self.assertEqual(proteinligprep.can_run(), False)
            self.assertEqual(proteinligprep.get_error(),
                             'challengedata task has notfound status')

            # challenge filter running
            chall = ChallengeDataTask(temp_dir, params)
            chall.create_dir()
            open(os.path.join(chall.get_dir(), D3RTask.START_FILE),
                 'a').close()
            proteinligprep = ProteinLigPrepTask(temp_dir, params)
            self.assertEqual(proteinligprep.can_run(), False)
            self.assertEqual(proteinligprep.get_error(),
                             'challengedata task has start status')

            # blastnfilter failed
            error_file = os.path.join(chall.get_dir(), D3RTask.ERROR_FILE)
            open(error_file, 'a').close()
            proteinligprep = ProteinLigPrepTask(temp_dir, params)
            self.assertEqual(proteinligprep.can_run(), False)
            self.assertEqual(proteinligprep.get_error(),
                             'challengedata task has error status')

            # blastnfilter success
            os.remove(error_file)
            open(os.path.join(chall.get_dir(), D3RTask.COMPLETE_FILE),
                 'a').close()
            proteinligprep = ProteinLigPrepTask(temp_dir, params)
            self.assertEqual(proteinligprep.can_run(), True)
            self.assertEqual(proteinligprep.get_error(), None)

            # proteinligprep task exists already
            proteinligprep = ProteinLigPrepTask(temp_dir, params)
            proteinligprep.create_dir()
            self.assertEqual(proteinligprep.can_run(), False)
            self.assertEqual(
                proteinligprep.get_error(),
                proteinligprep.get_dir_name() +
                ' already exists and status is unknown')

            # proteinlibprep already complete
            proteinligprep = ProteinLigPrepTask(temp_dir, params)
            open(os.path.join(proteinligprep.get_dir(), D3RTask.COMPLETE_FILE),
                 'a').close()
            self.assertEqual(proteinligprep.can_run(), False)
            self.assertEqual(proteinligprep.get_error(), None)

        finally:
            shutil.rmtree(temp_dir)