Beispiel #1
0
    def test_create_readme(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            params.version = '1.0'
            task = ChallengeDataTask(temp_dir, params)
            blast = BlastNFilterTask(temp_dir, params)
            blast.create_dir()
            sfile = blast.get_blastnfilter_summary_file()
            f = open(sfile, 'w')
            f.write('hello there\n')
            f.flush()
            f.close()

            task._create_readme(temp_dir)
            readme = os.path.join(temp_dir,
                                  ChallengeDataTask.README_TXT_FILE)
            self.assertEqual(os.path.isfile(readme), True)
            f = open(readme, 'r')
            found = False
            for line in f:
                print line
                if re.match('^hello there.*$', line):
                    found = True
                    break

            f.close()
            self.assertEqual(found, True)

        finally:
            shutil.rmtree(temp_dir)
Beispiel #2
0
    def test_create_readme_version_empty_summary(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            params.version = '1.0'
            task = ChallengeDataTask(temp_dir, params)
            blast = BlastNFilterTask(temp_dir, params)
            blast.create_dir()
            sfile = blast.get_blastnfilter_summary_file()
            open(sfile, 'a').close()

            task._create_readme(temp_dir)
            readme = os.path.join(temp_dir,
                                  ChallengeDataTask.README_TXT_FILE)
            self.assertEqual(os.path.isfile(readme), True)
            f = open(readme, 'r')
            found = False
            for line in f:

                if re.match('^Celpprunner version: 1.0.*$', line):
                    found = True
                    break

            f.close()
            self.assertEqual(found, True)

        finally:
            shutil.rmtree(temp_dir)
Beispiel #3
0
def generate_reports(theargs):
    """Generates reports
    """
    celpp_years = util.get_all_celpp_years(theargs.celppdir)

    if theargs.outdir is None:
        raise Exception('--outdir must be set')

    if not os.path.isdir(theargs.outdir):
        os.makedirs(theargs.outdir)

    f = open(os.path.join(theargs.outdir, BLASTNFILTER_SUMMARY_CSV), 'w')
    f.write('Week #, Year, Complexes, Dockable complexes, Dockable monomers, '
            'Targets Found\n')
    for year in celpp_years:
        logger.info('Examining year ' + year)
        for week in util.get_all_celpp_weeks(
                os.path.join(theargs.celppdir, year)):
            logger.debug('Examining week ' + week)
            the_dir = os.path.join(theargs.celppdir, year,
                                   util.DATA_SET_WEEK_PREFIX + week)
            blast = BlastNFilterTask(the_dir, theargs)
            summary = blast.get_blastnfilter_summary()
            f.write(summary.get_csv() + '\n')
    f.flush()
    f.close()
Beispiel #4
0
    def test_get_task_list_for_stage_for_scoring_stage_with_twofound(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            blasttask = BlastNFilterTask(temp_dir, params)
            blasttask.create_dir()
            open(os.path.join(blasttask.get_dir(),
                              D3RTask.COMPLETE_FILE), 'a').close()
            params.latest_weekly = temp_dir
            glidedir = os.path.join(temp_dir,
                                    EvaluationTaskFactory.DOCKSTAGE_PREFIX +
                                    'glide')
            os.mkdir(glidedir)
            open(os.path.join(glidedir, D3RTask.COMPLETE_FILE), 'a').close()
            freddir = os.path.join(temp_dir,
                                   EvaluationTaskFactory.DOCKSTAGE_PREFIX +
                                   'fred')
            os.mkdir(freddir)
            open(os.path.join(freddir, D3RTask.COMPLETE_FILE), 'a').close()

            task_list = celpprunner.get_task_list_for_stage(params,
                                                            'evaluation')
            self.assertEqual(len(task_list), 2)
        finally:
            shutil.rmtree(temp_dir)
Beispiel #5
0
    def test_run_fails_cause_genchallenge_fails(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            params.genchallenge = 'false'
            params.pdbdb = '/foo'
            params.version = '1'
            blastnfilter = BlastNFilterTask(temp_dir, params)
            blastnfilter.create_dir()
            open(os.path.join(blastnfilter.get_dir(), D3RTask.COMPLETE_FILE),
                 'a').close()
            chall = ChallengeDataTask(temp_dir, params)

            chall.run()
            self.assertEqual(chall.get_error(),
                             'Non zero exit code: 1 received. Standard out: ' +
                             ' Standard error: ')
            # test file gets created
            errfile = os.path.join(chall.get_dir(),
                                   D3RTask.ERROR_FILE)
            self.assertEqual(os.path.isfile(errfile), True)

            stderr = os.path.join(chall.get_dir(),
                                  'false.stderr')
            self.assertEqual(os.path.isfile(stderr), True)
            stdout = os.path.join(chall.get_dir(),
                                  'false.stdout')
            self.assertEqual(os.path.isfile(stdout), True)
        finally:
            shutil.rmtree(temp_dir)
Beispiel #6
0
    def _create_readme(self, path):
        """Creates readme.txt file for task

        """
        ver = 'Unknown'
        try:
            ver = self.get_args().version
        except AttributeError:
            logger.warning('Version unset using Unknown')

        f = open(os.path.join(path, ChallengeDataTask.README_TXT_FILE), 'w')
        f.write(ChallengeDataTask.README_BODY.format(
            version=ver,
            week=self._week_num,
            year=self._year))

        blast = BlastNFilterTask(self.get_path(), self.get_args())
        summary_file = blast.get_blastnfilter_summary_file()

        # append summary.txt file
        if os.path.isfile(summary_file):
            sumfile = open(summary_file, 'r')
            for line in sumfile:
                f.write(line)
            sumfile.close()

        f.flush()
        f.close()
Beispiel #7
0
 def test_run_with_can_run_already_set_false(self):
     params = D3RParameters()
     params.blastnfilter = 'false'
     params.postanalysis = 'false'
     params.pdbdb = '/pdbdb'
     blasttask = BlastNFilterTask(None, params)
     blasttask._can_run = False
     blasttask.run()
Beispiel #8
0
    def test_blastnfilter_summary_week_and_year(self):

        blast = BlastNFilterTask('/foo', D3RParameters())
        summary = BlastNFilterSummary('/foo/2018/dataset.week.4'
                                      '/' + blast.get_dir_name())
        self.assertEqual(summary.get_week_number(), '4')
        self.assertEqual(summary.get_year(), '2018')
        self.assertEqual(summary.get_csv(), '4,2018,0,0,0,0')
Beispiel #9
0
    def test_run_success_with_ftp_upload(self):
        temp_dir = tempfile.mkdtemp()
        try:
            script = self.create_gen_challenge_script(temp_dir)
            params = D3RParameters()
            params.genchallenge = script
            params.pdbdb = '/foo'
            params.version = '1'

            blastnfilter = BlastNFilterTask(temp_dir, params)
            blastnfilter.create_dir()
            open(os.path.join(blastnfilter.get_dir(), D3RTask.COMPLETE_FILE),
                 'a').close()

            chall = ChallengeDataTask(temp_dir, params)
            mockftp = D3RParameters()
            mockftp.put = Mock(side_effect=[3, 5])
            ftp = FtpFileTransfer(None)
            ftp.set_remote_challenge_dir('/challenge')
            ftp.set_connection(mockftp)
            chall.set_file_transfer(ftp)

            dimport = DataImportTask(temp_dir, params)
            dimport.create_dir()

            ctsv = dimport.get_crystalph_tsv()
            f = open(ctsv, 'w')
            f.write('crystal')
            f.flush()
            f.close()

            nonpoly = dimport.get_nonpolymer_tsv()
            f = open(nonpoly, 'w')
            f.write('nonpoly')
            f.flush()
            f.close()

            seq = dimport.get_sequence_tsv()
            f = open(seq, 'w')
            f.write('seq')
            f.flush()
            f.close()

            chall.run()
            self.assertEqual(chall.get_error(), None)
            # verify test files get created
            errfile = os.path.join(chall.get_dir(),
                                   D3RTask.ERROR_FILE)
            self.assertEqual(os.path.isfile(errfile), False)

            compfile = os.path.join(chall.get_dir(),
                                    D3RTask.COMPLETE_FILE)
            self.assertEqual(os.path.isfile(compfile), True)
            ftp.disconnect()

        finally:
            shutil.rmtree(temp_dir)
Beispiel #10
0
 def test_get_blastnfilter_summary(self):
     """Tests BlastNFilterTask.get_blastnfilter_summary call
     """
     temp_dir = tempfile.mkdtemp()
     try:
         params = D3RParameters()
         blasttask = BlastNFilterTask(temp_dir, params)
         summary = blasttask.get_blastnfilter_summary()
         self.assertEqual(summary.get_complexes(), 0)
     finally:
         shutil.rmtree(temp_dir)
Beispiel #11
0
    def __init__(self, path, args):
        super(ChallengeDataTask, self).__init__(path, args)
        self.set_name('challengedata')

        # Make stage number one higher then BlastNFilter Stage
        blast = BlastNFilterTask(path, args)
        self.set_stage(blast.get_stage() + 1)

        self.set_status(D3RTask.UNKNOWN_STATUS)
        self._challenge_tarball_filename = None
        self._week_num = util.get_celpp_week_number_from_path(self.get_path())
        self._year = util.get_celpp_year_from_path(self.get_dir())
Beispiel #12
0
    def test_get_evaluation_tasks_on_dir_with_lower_stages_dirs(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            dataimport = DataImportTask(temp_dir, params)
            blast = BlastNFilterTask(temp_dir, params)
            os.mkdir(os.path.join(temp_dir, dataimport.get_dir_name()))
            os.mkdir(os.path.join(temp_dir, blast.get_dir_name()))

            stf = EvaluationTaskFactory(temp_dir, params)
            task_list = stf.get_evaluation_tasks()
            self.assertEquals(len(task_list), 0)
        finally:
            shutil.rmtree(temp_dir)
Beispiel #13
0
 def test_constructor(self):
     params = D3RParameters()
     blasttask = BlastNFilterTask('ha', params)
     self.assertEqual(blasttask.get_name(), 'blastnfilter')
     self.assertEqual(blasttask.get_path(), 'ha')
     self.assertEqual(blasttask.get_stage(), 3)
     self.assertEqual(blasttask.get_status(), D3RTask.UNKNOWN_STATUS)
     self.assertEqual(blasttask.get_error(), None)
Beispiel #14
0
    def can_run(self):
        """Determines if task can actually run

           This method first verifies the docking task
           has `D3RTask.COMPLETE_STATUS` for
           status.  The method then verifies this task does
           not already exist.  If above is not true then self.set_error()
           is set with information about the issue
           :return: True if can run otherwise False
        """
        self._can_run = False
        self._error = None
        self._docktask.update_status_from_filesystem()
        if self._docktask.get_status() != D3RTask.COMPLETE_STATUS:
            logger.info('Cannot run ' + self.get_name() + ' task ' +
                        'because ' + self._docktask.get_name() + ' task' +
                        ' has a status of ' + self._docktask.get_status())
            self.set_error(self._docktask.get_name() + ' task has ' +
                           self._docktask.get_status() + ' status')
            return False

        # check blast
        blastnfilter = BlastNFilterTask(self._path, self._args)
        blastnfilter.update_status_from_filesystem()
        if blastnfilter.get_status() != D3RTask.COMPLETE_STATUS:
            logger.info('Cannot run ' + self.get_name() + 'task ' +
                        'because ' + blastnfilter.get_name() + 'task' +
                        'has a status of ' + blastnfilter.get_status())
            self.set_error(blastnfilter.get_name() + ' task has ' +
                           blastnfilter.get_status() + ' status')
            return False

        # check this task is not complete and does not exist

        self.update_status_from_filesystem()
        if self.get_status() == D3RTask.COMPLETE_STATUS:
            logger.debug("No work needed for " + self.get_name() + " task")
            return False

        if self.get_status() != D3RTask.NOTFOUND_STATUS:
            logger.warning(self.get_name() + " task was already " +
                           "attempted, but there was a problem")
            self.set_error(self.get_dir_name() + ' already exists and ' +
                           'status is ' + self.get_status())
            return False
        self._can_run = True
        return True
Beispiel #15
0
 def test_run_fails_cause_genchallenge_not_set(self):
     temp_dir = tempfile.mkdtemp()
     try:
         params = D3RParameters()
         params.version = '1'
         blastnfilter = BlastNFilterTask(temp_dir, params)
         blastnfilter.create_dir()
         open(os.path.join(blastnfilter.get_dir(), D3RTask.COMPLETE_FILE),
              'a').close()
         chall = ChallengeDataTask(temp_dir, params)
         chall.run()
         self.assertEqual(chall.get_error(),
                          'genchallenge not set')
         # test files get created
         self.assertEqual(os.path.isdir(chall.get_dir()),
                          True)
         errfile = os.path.join(chall.get_dir(),
                                D3RTask.ERROR_FILE)
         self.assertEqual(os.path.isfile(errfile), True)
     finally:
         shutil.rmtree(temp_dir)
Beispiel #16
0
    def test_run_fails_cause_genchallenge_is_not_found(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            params.genchallenge = '/bin/doesnotexist'
            params.pdbdb = '/foo'
            params.version = '1'
            blastnfilter = BlastNFilterTask(temp_dir, params)
            blastnfilter.create_dir()
            open(os.path.join(blastnfilter.get_dir(), D3RTask.COMPLETE_FILE),
                 'a').close()
            chall = ChallengeDataTask(temp_dir, params)

            chall.run()
            self.assertEqual(chall.get_error(),
                             'Caught Exception trying to run ' +
                             '/bin/doesnotexist --candidatedir ' +
                             blastnfilter.get_dir() + ' --pdbdb ' +
                             '/foo --outdir ' +
                             chall.get_dir() +
                             '/' + chall.get_celpp_challenge_data_dir_name() +
                             ' : [Errno 2] No such file or directory')

            # test files get created
            errfile = os.path.join(chall.get_dir(),
                                   D3RTask.ERROR_FILE)
            self.assertEqual(os.path.isfile(errfile), True)
        finally:
            shutil.rmtree(temp_dir)
Beispiel #17
0
    def test_can_run(self):
        temp_dir = tempfile.mkdtemp()
        try:
            # no blast task found so it cannot run
            params = D3RParameters()
            task = ChallengeDataTask(temp_dir, params)
            self.assertEqual(task.can_run(), False)
            self.assertEqual(task.get_error(),
                             'blastnfilter task has notfound status')

            # blastn filter running
            blastnfilter = BlastNFilterTask(temp_dir, params)
            blastnfilter.create_dir()
            open(os.path.join(blastnfilter.get_dir(), D3RTask.START_FILE),
                 'a').close()
            task = ChallengeDataTask(temp_dir, params)
            self.assertEqual(task.can_run(), False)
            self.assertEqual(task.get_error(),
                             'blastnfilter task has start status')

            # blastnfilter failed
            error_file = os.path.join(blastnfilter.get_dir(),
                                      D3RTask.ERROR_FILE)
            open(error_file, 'a').close()
            task = ChallengeDataTask(temp_dir, params)
            self.assertEqual(task.can_run(), False)
            self.assertEqual(task.get_error(),
                             'blastnfilter task has error status')

            # blastnfilter success
            os.remove(error_file)
            open(os.path.join(blastnfilter.get_dir(),
                              D3RTask.COMPLETE_FILE), 'a').close()
            task = ChallengeDataTask(temp_dir, params)
            self.assertEqual(task.can_run(), True)
            self.assertEqual(task.get_error(), None)

            # proteinligprep task exists already
            task = ChallengeDataTask(temp_dir, params)
            task.create_dir()
            self.assertEqual(task.can_run(), False)
            self.assertEqual(task.get_error(),
                             task.get_dir_name() +
                             ' already exists and status is unknown')

            # proteinlibprep already complete
            task = ChallengeDataTask(temp_dir, params)
            open(os.path.join(task.get_dir(),
                              D3RTask.COMPLETE_FILE), 'a').close()
            self.assertEqual(task.can_run(), False)
            self.assertEqual(task.get_error(), None)

        finally:
            shutil.rmtree(temp_dir)
Beispiel #18
0
    def test_run_with_exception(self):
        temp_dir = tempfile.mkdtemp()

        try:
            params = D3RParameters()
            params.blastnfilter = 'falseasdfasdf'
            params.postanalysis = 'true'
            params.pdbdb = '/pdbdb'
            blasttask = BlastNFilterTask(temp_dir, params)
            blasttask._can_run = True
            blasttask.run()
            self.assertEqual(blasttask.get_status(), D3RTask.ERROR_STATUS)
            self.assertEqual(blasttask.get_error().startswith('Caught'), True)
            self.assertNotEqual(blasttask.get_error(), None)
        finally:
            shutil.rmtree(temp_dir)
Beispiel #19
0
 def test_update_status_from_filesystem(self):
     params = D3RParameters()
     task = BlastNFilterTask(None, params)
     test_task.try_update_status_from_filesystem(self, task)
Beispiel #20
0
def main():
    blasttask = BlastNFilterTask('', p)
    dataimport = DataImportTask('', p)
    challenge = ChallengeDataTask('', p)
    glide = GlideTask('', p)
    makedb = MakeBlastDBTask('', p)
    prot = ProteinLigPrepTask('', p)
    vina = AutoDockVinaTask('', p)
    chimeraprep = ChimeraProteinLigPrepTask('', p)
    desc = """
              Version {version}

              Runs the 9 stages (makedb, import, blast, challengedata,
              proteinligprep, {chimeraprep}, extsubmission, glide, vina, &
              evaluation) of CELPP processing pipeline
              (http://www.drugdesigndata.org)

              CELPP processing pipeline relies on a set of directories
              with specific structure. The pipeline runs a set of stages
              Each stage has a numerical value and a name. The numerical
              value denotes order and the stage name identifies separate
              tasks to run in the stage.

              The filesystem structure of the stage is:

              stage.<stage number>.<task name>

              The stage(s) run are defined via the required --stage flag.

              To run multiple stages serially just pass a comma delimited
              list to the --stage flag. Example: --stage import,blast

              NOTE:  When running multiple stages serially the program will
                     not run subsequent stages if a task in a stage fails.
                     Also note order matters, ie putting blast,import will
                     cause celpprunner.py to run blast stage first.

              This program drops a pid lockfile
              (celpprunner.<stage>.lockpid) in celppdir to prevent duplicate
              invocation.

              When run, this program will examine the stage and see
              if work can be done.  If stage is complete or previous
              steps have not completed, the program will exit silently.
              If previous steps have failed or current stage already
              exists in an error or uncomplete state then program will
              report the error via email using addresses set in --email
              flag. Errors will also be reported via stderr/stdout.
              The program will also exit with nonzero exit code.

              This program utilizes simple token files to denote stage
              completion.  If within the stage directory there is a:

              '{complete}' file - then stage is done and no other
                                checking is done.

              'error' file - then stage failed.

              'start' file - then stage is running.

              Notification of stage start and end will be sent to
              addresses set via --email flag.

              Unless --customweekdir is set, this program will
              examine the 'celppdir' (last argument passed on
              commandline) to find the latest directory with this path:

              <year>/dataset.week.#

              The program will find the latest <year> and within
              that year the dataset.week.# with highest #.  The output
              directories created will be put within this directory.

              Setting --customweekdir will cause program to use 'celppdir'
              path.

              Setting the --createweekdir flag will instruct this
              program to create a new directory for the current
              celpp week/year before running any stage processing.

              NOTE: CELPP weeks start on Friday and end on Thursday
                    and week # follows ISO8601 rules so week numbers
                    at the end and start of the year are a bit
                    wonky.

              Breakdown of behavior of program is defined by
              value passed with --stage flag:

              If --stage '{createchallenge}'

              This is NOT a stage, but has the same effect as
              calling --stage makedb,import,blast,challengedata
              The four stages that need to run to generate the challenge
              data package.

              If --stage 'makedb'

              In this stage the file {pdb_seqres} is downloaded from
              an ftp site set by --pdbsequrl.
              This file is then gunzipped and NCBI makeblastdb
              (set by --makeblastdb) is run on it to create a blast
              database.  The files are stored in {makeblastdb_dirname}

              If --stage 'import'

              In this stage 4 files are downloaded from urls specified
              by --compinchi and --pdbfileurl flags on the commandline
              into {dataimport_dirname} directory.

              The tsv files are (--pdbfileurl flag sets url to
              download these files from):

              {nonpolymer_tsv}
              {sequence_tsv}
              {crystal_tsv}

              The Components ich file is (--compinchi flag sets base url to
              download this file from):

              {compinchi_ich}

              This stage will just wait and retry if any of the tsv files
              have NOT been updated since the start of the current
              celpp week as determined by a HEAD request. To bypass
              this delay add --skipimportwait flag.  --importsleep denotes
              the time to wait before re-examining the update time of the
              tsv files and --importretry sets number of times to retry
              before giving up.

              If --stage 'blast'

              Verifies {dataimport_dirname} exists and has '{complete}'
              file.  Also verifies {makeblastdb_dirname} exists and has
              '{complete}' file.  If both conditions are met then the
              'blast' stage is run which invokes script set by
              --blastnfilter flag and output stored in
              {blast_dirname}.
              Requires --pdbdb to be set to a directory with valid PDB
              database files.

              Note: --blastnfilter script is killed after time set with
              --blastnfiltertimeout flag.


              If --stage 'challengedata'

              Verifies {blast_dirname} exists and has '{complete}'
              file.  If complete, this stage runs which invokes program
              set in --genchallenge flag to create a challenge dataset
              file.  The --pdbdb flag must also be set when calling this
              stage. If --ftpconfig is set with {challengepath} field then
              this stage will also upload the challenge dataset tarfile
              to the ftp server with path set by {challengepath}.  The
              code will also upload a {latest_txt} file containing name
              of the tarfile to the same destination overwriting any
              {latest_txt} file that already exists.

              Example file for --ftpconfig:

              {host} some.ftp.com
              {user} bob
              {passn} mypass
              {path} /celpp
              {challengepath} /challenge
              {submissionpath} /submissions


              If --stage '{chimeraprep}'

              Verifies {challenge_dirname} exists and has '{complete}'
              file.  If complete, this stage runs which invokes program
              set in --chimeraprep flag to prepare pdb and inchi files
              storing output in {chimeraprep_dirname}.  --pdbdb flag
              must also be set when calling this stage.

              If --stage 'proteinligprep'

              Verifies {challenge_dirname} exists and has '{complete}'
              file.  If complete, this stage runs which invokes program
              set in --proteinligprep flag to prepare pdb and inchi files
              storing output in {proteinligprep_dirname}.  --pdbdb flag
              must also be set when calling this stage.

              If --stage 'extsubmission'

              Connects to server specified by --ftpconfig and downloads
              external docking submissions from {submissionpath} on remote
              server.

              Submissions should be named:

              celpp_weekXX_YYYY_dockedresults_ZZZZ.tar.gz as documented here:

              https://github.com/drugdata/d3r/wiki/Proposed-challenge-docked\
              -results-file-structure

              For each submission a directory named stage.X.ZZZZ.extsubmission
              will be created and uncompressed contents of package will be
              stored in that directory.  If data does not conform properly
              'error' file will be placed in directory denoting failure

              If --stage 'glide'

              Verifies {proteinligprep_dirname} exists and has a '{complete}'
              file within it.  If complete, this stage runs which invokes
              program set in --glide flag to perform docking via glide
              storing output in {glide_dirname}

              If --stage 'vina'

              Verifies {proteinligprep_dirname} exists and has a '{complete}'
              file within it.  If complete, this stage runs which invokes
              program set in --vina flag to perform docking via AutoDock Vina
              storing output in {vina_dirname}

              If --stage 'evaluation'

              Finds all stage.{dockstage}.<algo> directories with '{complete}'
              files in them which do not end in name '{webdata}' and runs
              script set via --evaluation parameter storing the result of
              the script into stage.{evalstage}.<algo>.evaluation. --pdbdb flag
              must also be set when calling this stage.


              """.format(makeblastdb_dirname=makedb.get_dir_name(),
                         dataimport_dirname=dataimport.get_dir_name(),
                         blast_dirname=blasttask.get_dir_name(),
                         challenge_dirname=challenge.get_dir_name(),
                         createchallenge=CREATE_CHALLENGE,
                         proteinligprep_dirname=prot.get_dir_name(),
                         glide_dirname=glide.get_dir_name(),
                         vina_dirname=vina.get_dir_name(),
                         dockstage=str(glide.get_stage()),
                         evalstage=str(glide.get_stage() + 1),
                         complete=blasttask.COMPLETE_FILE,
                         chimeraprep_dirname=chimeraprep.get_dir_name(),
                         chimeraprep=CHIMERA_PREP,
                         compinchi_ich=DataImportTask.COMPINCHI_ICH,
                         pdb_seqres=MakeBlastDBTask.PDB_SEQRES_TXT_GZ,
                         nonpolymer_tsv=DataImportTask.NONPOLYMER_TSV,
                         sequence_tsv=DataImportTask.SEQUENCE_TSV,
                         crystal_tsv=DataImportTask.CRYSTALPH_TSV,
                         webdata=EvaluationTaskFactory.WEB_DATA_SUFFIX,
                         latest_txt=ChallengeDataTask.LATEST_TXT,
                         host=FtpFileTransfer.HOST,
                         user=FtpFileTransfer.USER,
                         passn=FtpFileTransfer.PASS,
                         path=FtpFileTransfer.PATH,
                         challengepath=FtpFileTransfer.CHALLENGEPATH,
                         submissionpath=FtpFileTransfer.SUBMISSIONPATH,
                         version=d3r.__version__)

    theargs = _parse_arguments(desc, sys.argv[1:])
    theargs.program = sys.argv[0]
    theargs.version = d3r.__version__

    util.setup_logging(theargs)

    try:
        run_stages(theargs)
    except Exception:
        logger.exception("Error caught exception")
        sys.exit(2)
Beispiel #21
0
    def test_parse_blastnfilter_output_for_hit_stats(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            blasttask = BlastNFilterTask(temp_dir, params)

            blasttask.create_dir()

            # no summary.txt file
            self.assertEquals(
                blasttask._parse_blastnfilter_output_for_hit_stats(),
                '\n# txt files found: 0\n\nOutput from ' + 'summary.txt\n')

            csv_file = os.path.join(blasttask.get_dir(), '4zyc.txt')
            f = open(csv_file, 'w')
            f.write('NEED TO PUT REAL DATA IN HERE\n')
            f.flush()
            f.close()
            self.assertEquals(
                blasttask._parse_blastnfilter_output_for_hit_stats(),
                '\n# txt files found: 1\n\nOutput from ' + 'summary.txt\n')

            csv_file = os.path.join(blasttask.get_dir(), '4qqq.txt')
            f = open(csv_file, 'w')
            f.write('NEED TO PUT REAL DATA IN HERE\n')
            f.flush()
            f.close()
            res = blasttask._parse_blastnfilter_output_for_hit_stats()\
                .rstrip('\n')
            res.index('# txt files found: 2')
            res.index('Output from summary.txt')

            csv_file = os.path.join(blasttask.get_dir(), '4abc.txt')
            f = open(csv_file, 'w')
            f.write('NEED TO PUT REAL DATA IN HERE\n')
            f.flush()
            f.close()
            res = blasttask._parse_blastnfilter_output_for_hit_stats()\
                .rstrip('\n')
            res.index('# txt files found: 3')
            res.index('Output from summary.txt')

        finally:
            shutil.rmtree(temp_dir)
Beispiel #22
0
    def test_generate_reports_with_one_week_in_one_year(self):
        temp_dir = tempfile.mkdtemp()
        try:
            theargs = D3RParameters()

            theargs.celppdir = temp_dir
            outdir = os.path.join(temp_dir, 'outdir')
            theargs.outdir = outdir

            yeardir = os.path.join(temp_dir, '2016')
            os.mkdir(yeardir)
            weekdir = os.path.join(yeardir, 'dataset.week.10')
            os.mkdir(weekdir)
            blast = BlastNFilterTask(temp_dir, theargs)
            blastdir = os.path.join(weekdir, blast.get_dir_name())
            os.mkdir(blastdir)

            # create dummary summary.txt file

            f = open(os.path.join(blastdir, BlastNFilterTask.SUMMARY_TXT), 'w')
            f.write('INPUT SUMMARY\n')
            f.write('  entries:                             221\n')
            f.write('  complexes:                           178\n')
            f.write('  dockable complexes:                   95\n')
            f.write('  monomers:                            145\n')
            f.write('  dockable monomers:                    71\n')
            f.write('  multimers:                            76\n')
            f.write('  dockable multimers:                   24\n\n')

            f.write('FILTERING CRITERIA\n')
            f.write('  No. of query sequences           <=    1\n')
            f.write('  No. of dockable ligands           =    1\n')
            f.write('  Percent identity                 >=    0.95\n')
            f.write('  Percent Coverage                 >=    0.9\n')
            f.write('  No. of hit sequences             <=    4\n')
            f.write('  Structure determination method:        '
                    '  x-ray diffraction\n\n')

            f.write('OUTPUT SUMMARY\n')
            f.write('  Targets found:                        67\n')
            f.write('  Target: 5fz7|Sequences: 1|Hits: 94|Candidates: 17|'
                    'Elected:4|PDBids: 5fz7,5fyz,5a3p,5a1f\n')

            f.flush()
            f.close()

            celppreports.generate_reports(theargs)
            self.assertEqual(os.path.isdir(outdir), True)
            csv_file = os.path.join(outdir, 'blastnfilter.summary.csv')
            self.assertEqual(os.path.isfile(csv_file), True)

            # check csv file
            f = open(csv_file, 'r')
            header = f.readline()
            self.assertEqual(
                header, 'Week #, Year, Complexes, Dockable '
                'complexes, Dockable monomers, '
                'Targets Found\n')
            data = f.readline()
            self.assertEqual(data, '10,2016,178,95,71,67\n')

        finally:
            shutil.rmtree(temp_dir)
Beispiel #23
0
    def test_run_with_error(self):
        temp_dir = tempfile.mkdtemp()

        try:
            params = D3RParameters()
            params.blastnfilter = 'false'
            params.postanalysis = 'true'
            params.pdbdb = '/pdbdb'
            blasttask = BlastNFilterTask(temp_dir, params)
            blasttask._can_run = True
            blasttask.run()
            self.assertEqual(blasttask.get_status(), D3RTask.COMPLETE_STATUS)
            self.assertEqual(blasttask.get_error(), None)
            complete_file = os.path.join(blasttask.get_dir(),
                                         D3RTask.COMPLETE_FILE)

            self.assertEqual(os.path.isfile(complete_file), True)
            error_file = os.path.join(blasttask.get_dir(), D3RTask.ERROR_FILE)

            self.assertEqual(os.path.isfile(error_file), False)

            std_err_file = os.path.join(blasttask.get_dir(), 'false.stderr')

            self.assertEqual(os.path.isfile(std_err_file), True)

            std_out_file = os.path.join(blasttask.get_dir(), 'false.stdout')

            self.assertEqual(os.path.isfile(std_out_file), True)
        finally:
            shutil.rmtree(temp_dir)
Beispiel #24
0
    def test_run_fail_due_to_time_but_with_txt_file(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            foo_script = os.path.join(temp_dir, 'foo.py')
            params.blastnfilter = foo_script
            params.postanalysis = '/bin/echo'
            params.pdbdb = '/pdbdb'
            params.blastnfiltertimeout = 1
            blasttask = BlastNFilterTask(temp_dir, params)
            blasttask._can_run = True

            txt_file = os.path.join(blasttask.get_dir(), '4za4.txt')

            txt_contents = (
                'query, 4za4\\n' + 'ph, 7.4\\n' + 'ligand, 4LU\\n\\n' +
                'inchi, InChI=1S/C22H29N4O9P/c1-10-7-12-16-' +
                '15(11(10)2)22(3,4)5-6-25(16)17-19(23-21(31)' +
                '24-20(17)30)26(12)8-13(27)18(29)14(28)9-35-' +
                '36(32,33)34/h6-7,13-14,18,27-29H,5,8-9H2,1-' +
                '4H3,(H3-,23,24,30,31,32,33,34)/p+1/t13-,14+' + ',18-/m0/s1\\n'
                'largest, 4zz3, 4PP\\n' + 'smallest, 3ax3, 4LP\\n' +
                'holo, 2ax1, XDN\\n' + 'apo, 2ll3, GSS\\n')

            # create fake blastnfilter script that makes csv files
            f = open(foo_script, 'w')
            f.write('#! /usr/bin/env python\n\n')
            f.write('import time\n')
            f.write('f = open(\'' + txt_file + '\', \'w\')\n')
            f.write('f.write(\'' + txt_contents + '\\n\')\n')
            f.write('f.flush()\nf.close()\n')
            f.write('time.sleep(360)\n')
            f.flush()
            f.close()
            os.chmod(foo_script, stat.S_IRWXU)

            blasttask.run()
            self.assertEqual(blasttask.get_error(), None)
            self.assertEqual(blasttask.get_status(), D3RTask.COMPLETE_STATUS)
            complete_file = os.path.join(blasttask.get_dir(),
                                         D3RTask.COMPLETE_FILE)

            self.assertEqual(os.path.isfile(complete_file), True)

            std_err_file = os.path.join(blasttask.get_dir(), 'foo.py.stderr')

            self.assertEqual(os.path.isfile(std_err_file), True)

            std_out_file = os.path.join(blasttask.get_dir(), 'foo.py.stdout')

            self.assertEqual(os.path.isfile(std_out_file), True)

            res = blasttask.get_email_log().rstrip('\n')
            res.index('/foo.py')
            res.index('# txt files found: 1')
            res.index('Output from summary.txt')
        finally:
            shutil.rmtree(temp_dir)
Beispiel #25
0
    def test_get_txt_files(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            blast_task = BlastNFilterTask(temp_dir, params)

            # try missing directory
            self.assertEquals(len(blast_task.get_txt_files()), 0)

            blast_task.create_dir()

            # try empty directory
            self.assertEquals(len(blast_task.get_txt_files()), 0)

            # try where only summary.txt exists
            summary_file = os.path.join(blast_task.get_dir(), 'summary.txt')
            open(summary_file, 'a').close()
            self.assertEquals(len(blast_task.get_txt_files()), 0)

            # try non txt files only
            os.remove(summary_file)
            open(os.path.join(blast_task.get_dir(), 'foo.csv'), 'a').close()
            self.assertEquals(len(blast_task.get_txt_files()), 0)

            # try 1 txt file
            open(os.path.join(blast_task.get_dir(), '4vwx.txt'), 'a').close()
            self.assertEquals(len(blast_task.get_txt_files()), 1)

            # try 1 summary.txt and 1 txt file
            summary_file = os.path.join(blast_task.get_dir(), 'summary.txt')
            open(summary_file, 'a').close()
            self.assertEquals(len(blast_task.get_txt_files()), 1)

            # try multiple txt files
            open(os.path.join(blast_task.get_dir(), '5vwx.txt'), 'a').close()
            self.assertEquals(len(blast_task.get_txt_files()), 2)

        finally:
            shutil.rmtree(temp_dir)
Beispiel #26
0
    def test_run_with_blast_success_postanalysis_success_no_summary_file(self):
        temp_dir = tempfile.mkdtemp()

        try:
            params = D3RParameters()
            params.blastnfilter = 'true'
            params.postanalysis = '/bin/echo'
            params.pdbdb = '/pdbdb'
            blasttask = BlastNFilterTask(temp_dir, params)
            blasttask._can_run = True
            blasttask.run()
            self.assertEqual(blasttask.get_status(), D3RTask.COMPLETE_STATUS)
            self.assertEqual(blasttask.get_error(), None)
            complete_file = os.path.join(blasttask.get_dir(),
                                         D3RTask.COMPLETE_FILE)

            self.assertEqual(os.path.isfile(complete_file), True)

            std_err_file = os.path.join(blasttask.get_dir(), 'echo.stderr')

            self.assertEqual(os.path.isfile(std_err_file), True)

            std_out_file = os.path.join(blasttask.get_dir(), 'echo.stdout')
            dataimport = DataImportTask(temp_dir, params)
            f = open(std_out_file, 'r')
            echo_out = f.read().replace('\n', '')
            echo_out.index('--compinchi ' +
                           os.path.join(temp_dir, dataimport.get_dir_name(),
                                        DataImportTask.COMPINCHI_ICH))

            echo_out.index(' ' +
                           os.path.join(temp_dir, blasttask.get_dir_name()))
            f.close()

            self.assertEqual(os.path.isfile(std_out_file), True)
            self.assertEquals(blasttask.get_status(), D3RTask.COMPLETE_STATUS)
            self.assertEquals(
                os.path.exists(os.path.join(blasttask.get_dir(),
                                            'true.stderr')), True)
            self.assertEquals(
                os.path.exists(os.path.join(blasttask.get_dir(),
                                            'true.stdout')), True)
            res = blasttask.get_email_log().rstrip('\n')
            res.index('/bin/echo')
            res.index('# txt files found: 0')
            res.index('Output from summary.txt')
        finally:
            shutil.rmtree(temp_dir)
Beispiel #27
0
    def run(self):
        """Runs EvaluationTask after verifying dock was good

           Method requires can_run() to be called before hand with
           successful outcome
           Otherwise method invokes D3RTask.start then this method
           creates a directory and invokes evaluation script.  Upon
           completion results are analyzed and success or error status
           is set appropriately and D3RTask.end is invoked
           """
        super(EvaluationTask, self).run()

        if self._can_run is False:
            logger.debug(self.get_dir_name() +
                         ' cannot run cause _can_run flag '
                         'is False')
            return

        try:
            logger.debug('evaluation set to ' + self.get_args().evaluation)
        except AttributeError:
            self.set_error('evaluation not set')
            self.end()
            return

        try:
            logger.debug('pdbdb set to ' + self.get_args().pdbdb)
        except AttributeError:
            self.set_error('pdbdb not set')
            self.end()
            return

        try:
            evaltimeout = self.get_args().evaluationtimeout
            logger.debug('Setting evaluation timeout to ' + str(evaltimeout))
        except AttributeError:
            evaltimeout = None

        try:
            killdelay = self.get_args().evaluationtimeoutkilldelay
            logger.debug('Setting evaluation kill delay to ' + str(killdelay))
        except AttributeError:
            killdelay = 60

        blastnfilter = BlastNFilterTask(self._path, self._args)
        challenge = ChallengeDataTask(self._path, self._args)
        challdir = os.path.join(challenge.get_dir(),
                                challenge.get_celpp_challenge_data_dir_name())
        #
        # --pdbdb <path to pdb.extracted> --dockdir <stage.4.glide> \
        # --outdir <path to stage.5.glide.evaluation>
        #
        cmd_to_run = (self.get_args().evaluation + ' --pdbdb ' +
                      self.get_args().pdbdb + ' --dockdir ' +
                      self._docktask.get_dir() + ' --blastnfilterdir ' +
                      blastnfilter.get_dir() + ' --challengedir ' + challdir +
                      ' --outdir ' + self.get_dir())

        eval_name = os.path.basename(self.get_args().evaluation)

        ecode = self.run_external_command(eval_name,
                                          cmd_to_run,
                                          False,
                                          timeout=evaltimeout,
                                          kill_delay=killdelay)

        # write out evaluate exit code file
        self._write_evaluate_exitcode_file(ecode)

        # attempt to send evaluation email
        try:
            self._emailer.send_evaluation_email(self)
            self.append_to_email_log(self._emailer.get_message_log())
        except Exception as e:
            logger.exception('Caught exception trying to send evaluation '
                             'email')
            self.append_to_email_log('Caught exception trying to send '
                                     'evaluation email ' + str(e) + '\n')

        # assess the result
        self.end()
Beispiel #28
0
    def test_get_uploadable_files(self):
        temp_dir = tempfile.mkdtemp()
        try:
            # test where directory doesn't even exist
            params = D3RParameters()
            task = BlastNFilterTask(temp_dir, params)
            self.assertEqual(task.get_uploadable_files(), [])

            # test on empty dir
            task.create_dir()

            # test with blastn filter log
            logfile = os.path.join(task.get_dir(),
                                   BlastNFilterTask.BLASTNFILTER_LOG)
            open(logfile, 'a').close()
            flist = task.get_uploadable_files()
            self.assertEqual(len(flist), 1)
            self.assertEqual(flist[0], logfile)

            # test with dockable.xslx
            dockable = os.path.join(task.get_dir(),
                                    BlastNFilterTask.DOCKABLE_XSLX)
            open(dockable, 'a').close()
            flist = task.get_uploadable_files()
            self.assertEqual(len(flist), 2)
            flist.index(dockable)

            # test with summary.txt
            summary = os.path.join(task.get_dir(),
                                   BlastNFilterTask.SUMMARY_TXT)
            open(summary, 'a').close()
            flist = task.get_uploadable_files()
            self.assertEqual(len(flist), 3)
            flist.index(summary)

            # test with 1 txt file
            txt_one = os.path.join(task.get_dir(), '1fcz.txt')
            open(txt_one, 'a').close()
            flist = task.get_uploadable_files()
            self.assertEqual(len(flist), 4)
            flist.index(txt_one)

            # test with 2 txt files
            txt_two = os.path.join(task.get_dir(), '4asd.txt')
            open(txt_two, 'a').close()
            flist = task.get_uploadable_files()
            self.assertEqual(len(flist), 5)
            flist.index(txt_two)

            # test with additional stderr/stdout files
            errfile = os.path.join(task.get_dir(), 'blastnfilter.py.stderr')
            open(errfile, 'a').close()
            flist = task.get_uploadable_files()
            self.assertEqual(len(flist), 6)
            flist.index(errfile)

            outfile = os.path.join(task.get_dir(), 'blastnfilter.py.stdout')
            open(outfile, 'a').close()
            flist = task.get_uploadable_files()
            self.assertEqual(len(flist), 7)
            flist.index(outfile)
            flist.index(errfile)
            flist.index(txt_two)
            flist.index(txt_one)
            flist.index(summary)
            flist.index(dockable)
            flist.index(logfile)

        finally:
            shutil.rmtree(temp_dir)
Beispiel #29
0
    def test_can_run(self):
        tempDir = tempfile.mkdtemp()

        try:
            # try where makeblastdb is not complete
            params = D3RParameters()
            blastTask = BlastNFilterTask(tempDir, params)
            self.assertEqual(blastTask.can_run(), False)

            # try where makeblastdb failed
            blastDb = MakeBlastDBTask(tempDir, params)
            blastDb.create_dir()
            errorFile = os.path.join(blastDb.get_path(),
                                     blastDb.get_dir_name(),
                                     D3RTask.ERROR_FILE)
            open(errorFile, 'a').close()
            self.assertEqual(blastTask.can_run(), False)
            self.assertEqual(blastTask.get_error(),
                             'makeblastdb task has error status')

            # try where data import is not complete
            completeFile = os.path.join(blastDb.get_path(),
                                        blastDb.get_dir_name(),
                                        D3RTask.COMPLETE_FILE)
            open(completeFile, 'a').close()
            self.assertEqual(blastTask.can_run(), False)
            self.assertEqual(blastTask.get_error(),
                             'dataimport task has ' + 'notfound status')

            # try where data import failed
            dataImport = DataImportTask(tempDir, params)
            dataImport.create_dir()
            errorFile = os.path.join(dataImport.get_path(),
                                     dataImport.get_dir_name(),
                                     D3RTask.ERROR_FILE)
            open(errorFile, 'a').close()
            self.assertEqual(blastTask.can_run(), False)
            self.assertEqual(blastTask.get_error(),
                             'dataimport task has error status')

            # try where blast can run
            os.remove(errorFile)
            completeFile = os.path.join(dataImport.get_dir(),
                                        D3RTask.COMPLETE_FILE)
            open(completeFile, 'a').close()
            self.assertEqual(blastTask.can_run(), True)
            self.assertEqual(blastTask.get_error(), None)

            # try where blast exists
            blastTask.create_dir()
            self.assertEqual(blastTask.can_run(), False)
            self.assertEqual(
                blastTask.get_error(),
                blastTask.get_dir_name() + ' already exists and' +
                ' status is unknown')

            # try where blast is complete
            completeFile = os.path.join(blastTask.get_path(),
                                        blastTask.get_dir_name(),
                                        D3RTask.COMPLETE_FILE)
            open(completeFile, 'a').close()
            self.assertEqual(blastTask.can_run(), False)
            self.assertEqual(blastTask.get_error(), None)

        finally:
            shutil.rmtree(tempDir)
Beispiel #30
0
    def test_run_with_blast_success_useoldseq_and_postanalysis_fail(self):
        temp_dir = tempfile.mkdtemp()

        try:
            params = D3RParameters()
            params.blastnfilter = '/bin/echo'
            params.postanalysis = os.path.join(temp_dir, 'foo.py')
            params.pdbdb = '/pdbdb'
            blasttask = BlastNFilterTask(temp_dir, params)
            blasttask._can_run = True

            txt_file = os.path.join(blasttask.get_dir(), 'summary.txt')

            txt_contents = ('INPUT SUMMARY\\n' + '  sequences:  177\\n' +
                            '  complexes:  149\\n')
            # create fake blastnfilter script that makes csv files
            f = open(params.postanalysis, 'w')
            f.write('#! /usr/bin/env python\n\n')
            f.write('f = open(\'' + txt_file + '\', \'w\')\n')
            f.write('f.write(\'' + txt_contents + '\\n\')\n')
            f.write('f.flush()\nf.close()\n')
            f.flush()
            f.close()
            os.chmod(params.postanalysis, stat.S_IRWXU)

            blasttask.run()
            self.assertEqual(blasttask.get_status(), D3RTask.COMPLETE_STATUS)
            self.assertEqual(blasttask.get_error(), None)
            complete_file = os.path.join(blasttask.get_dir(),
                                         D3RTask.COMPLETE_FILE)

            self.assertEqual(os.path.isfile(complete_file), True)

            std_err_file = os.path.join(blasttask.get_dir(), 'echo.stderr')

            self.assertEqual(os.path.isfile(std_err_file), True)

            std_out_file = os.path.join(blasttask.get_dir(), 'echo.stdout')

            dataimport = DataImportTask(temp_dir, params)
            makeblast = MakeBlastDBTask(temp_dir, params)

            f = open(std_out_file, 'r')
            echo_out = f.read().replace('\n', '')
            echo_out.index('--nonpolymertsv ' +
                           os.path.join(temp_dir, dataimport.get_dir_name(),
                                        DataImportTask.NONPOLYMER_TSV))
            echo_out.index(' --sequencetsv ' +
                           os.path.join(temp_dir, dataimport.get_dir_name(),
                                        DataImportTask.OLDSEQUENCE_TSV))
            echo_out.index(' --pdbblastdb ' +
                           os.path.join(temp_dir, makeblast.get_dir_name()))
            echo_out.index(' --compinchi ' +
                           os.path.join(temp_dir, dataimport.get_dir_name(),
                                        DataImportTask.COMPINCHI_ICH))
            echo_out.index(' --outdir ' +
                           os.path.join(temp_dir, blasttask.get_dir_name()))
            echo_out.index(' --crystalpH ' +
                           os.path.join(temp_dir, dataimport.get_dir_name(),
                                        DataImportTask.CRYSTALPH_TSV))
            echo_out.index(' --pdbdb /pdbdb ')
            f.close()

            self.assertEqual(os.path.isfile(std_out_file), True)
            self.assertEquals(blasttask.get_status(), D3RTask.COMPLETE_STATUS)
            self.assertEquals(
                os.path.exists(
                    os.path.join(blasttask.get_dir(), 'foo.py.stderr')), True)
            self.assertEquals(
                os.path.exists(
                    os.path.join(blasttask.get_dir(), 'foo.py.stdout')), True)
            res = blasttask.get_email_log().rstrip('\n')
            res.index('/bin/echo')
            res.index('# txt files found: 0')
            res.index('Output from summary.txt')
            res.index('  sequences:  177')
            res.index('  complexes:  149')
            res.index(dataimport.get_sequence_tsv() +
                      ' file not found falling back to ' +
                      dataimport.get_oldsequence_tsv())
        finally:
            shutil.rmtree(temp_dir)