def test_run_succeeds(self): temp_dir = tempfile.mkdtemp() try: params = D3RParameters() params.glide = 'true' proteinligprep = ProteinLigPrepTask(temp_dir, params) proteinligprep.create_dir() open(os.path.join(proteinligprep.get_dir(), D3RTask.COMPLETE_FILE), 'a').close() glide = GlideTask(temp_dir, params) glide.run() self.assertEqual(glide.get_error(), None) # test files get created errfile = os.path.join(glide.get_dir(), D3RTask.ERROR_FILE) self.assertEqual(os.path.isfile(errfile), False) compfile = os.path.join(glide.get_dir(), D3RTask.COMPLETE_FILE) self.assertEqual(os.path.isfile(compfile), True) stderr = os.path.join(glide.get_dir(), 'true.stderr') self.assertEqual(os.path.isfile(stderr), True) stdout = os.path.join(glide.get_dir(), 'true.stdout') self.assertEqual(os.path.isfile(stdout), True) finally: shutil.rmtree(temp_dir)
def test_run_fails_cause_can_run_is_false(self): temp_dir = tempfile.mkdtemp() try: params = D3RParameters() # return immediately cause can_run is false glide = GlideTask(temp_dir, params) glide.run() self.assertEqual(glide.get_error(), 'proteinligprep task has notfound status') finally: shutil.rmtree(temp_dir)
def test_run_fails_cause_glide_fails(self): temp_dir = tempfile.mkdtemp() try: params = D3RParameters() params.glide = 'false' proteinligprep = ProteinLigPrepTask(temp_dir, params) proteinligprep.create_dir() open(os.path.join(proteinligprep.get_dir(), D3RTask.COMPLETE_FILE), 'a').close() glide = GlideTask(temp_dir, params) glide.run() self.assertEqual(glide.get_error(), 'Non zero exit code: 1 received. Standard out: ' + ' Standard error: ') # test file gets created errfile = os.path.join(glide.get_dir(), D3RTask.ERROR_FILE) self.assertEqual(os.path.isfile(errfile), True) stderr = os.path.join(glide.get_dir(), 'false.stderr') self.assertEqual(os.path.isfile(stderr), True) stdout = os.path.join(glide.get_dir(), 'false.stdout') self.assertEqual(os.path.isfile(stdout), True) finally: shutil.rmtree(temp_dir)
def test_run_fails_cause_glide_is_not_found(self): temp_dir = tempfile.mkdtemp() try: params = D3RParameters() params.glide = '/bin/doesnotexist' proteinligprep = ProteinLigPrepTask(temp_dir, params) proteinligprep.create_dir() open(os.path.join(proteinligprep.get_dir(), D3RTask.COMPLETE_FILE), 'a').close() glide = GlideTask(temp_dir, params) glide.run() self.assertEqual(glide.get_error(), 'Caught Exception trying to run ' + '/bin/doesnotexist --structuredir ' + proteinligprep.get_dir() + ' --outdir ' + glide.get_dir() + ' : [Errno 2] No such file or directory') # test files get created errfile = os.path.join(glide.get_dir(), D3RTask.ERROR_FILE) self.assertEqual(os.path.isfile(errfile), True) finally: shutil.rmtree(temp_dir)
def test_run_fails_cause_glide_not_set(self): temp_dir = tempfile.mkdtemp() try: params = D3RParameters() proteinligprep = ProteinLigPrepTask(temp_dir, params) proteinligprep.create_dir() open(os.path.join(proteinligprep.get_dir(), D3RTask.COMPLETE_FILE), 'a').close() glide = GlideTask(temp_dir, params) glide.run() self.assertEqual(glide.get_error(), 'glide not set') # test files get created self.assertEqual(os.path.isdir(glide.get_dir()), True) errfile = os.path.join(glide.get_dir(), D3RTask.ERROR_FILE) self.assertEqual(os.path.isfile(errfile), True) finally: shutil.rmtree(temp_dir)
def test_get_evaluation_tasks_on_with_valid_completed_algo_dir(self): temp_dir = tempfile.mkdtemp() try: params = D3RParameters() glidetask = GlideTask(temp_dir, params) glidetask.create_dir() open(os.path.join(glidetask.get_dir(), D3RTask.COMPLETE_FILE), 'a').close() etask = EvaluationTask( temp_dir, glidetask.get_name() + '.' + EvaluationTaskFactory.SCORING_SUFFIX, glidetask, params) etask.create_dir() open(os.path.join(etask.get_dir(), D3RTask.COMPLETE_FILE), 'a').close() stf = EvaluationTaskFactory(temp_dir, params) task_list = stf.get_evaluation_tasks() self.assertEquals(len(task_list), 0) finally: shutil.rmtree(temp_dir)
def test_get_uploadable_files(self): temp_dir = tempfile.mkdtemp() try: params = D3RParameters() task = GlideTask(temp_dir, params) # try with no dir self.assertEqual(task.get_uploadable_files(), []) # try with empty dir task.create_dir() self.assertEqual(task.get_uploadable_files(), []) # try with final log final_log = os.path.join(task.get_dir(), GlideTask.FINAL_LOG) open(final_log, 'a').close() flist = task.get_uploadable_files() self.assertEqual(len(flist), 1) flist.index(final_log) # try with empty pbdid dir pbdid = os.path.join(task.get_dir(), '6fff') os.mkdir(pbdid) flist = task.get_uploadable_files() self.assertEqual(len(flist), 1) flist.index(final_log) # try with pbdid.txt pbdidtxt = os.path.join(task.get_dir(), '6fff.txt') open(pbdidtxt, 'a').close() flist = task.get_uploadable_files() self.assertEqual(len(flist), 2) flist.index(pbdidtxt) # try with LMCSS/LMCSS_dock_pv.maegz LMCSSd = os.path.join(pbdid, 'LMCSS') os.mkdir(LMCSSd) LMCSS = os.path.join(LMCSSd, 'LMCSS_dock_pv.maegz') open(LMCSS, 'a').close() flist = task.get_uploadable_files() self.assertEqual(len(flist), 3) flist.index(LMCSS) # try with SMCSS/SMCSS_dock_pv.maegz SMCSSd = os.path.join(pbdid, 'SMCSS') os.mkdir(SMCSSd) SMCSS = os.path.join(SMCSSd, 'SMCSS_dock_pv.maegz') open(SMCSS, 'a').close() flist = task.get_uploadable_files() self.assertEqual(len(flist), 4) flist.index(SMCSS) # try with hiResApo/hiResApo_dock_pv.maegz hiResApod = os.path.join(pbdid, 'hiResApo') os.mkdir(hiResApod) hiResApo = os.path.join(hiResApod, 'hiResApo_dock_pv.maegz') open(hiResApo, 'a').close() flist = task.get_uploadable_files() self.assertEqual(len(flist), 5) flist.index(hiResApo) # try with hiResHolo/hiResHolo_dock_pv.maegz hiResHolod = os.path.join(pbdid, 'hiResHolo') os.mkdir(hiResHolod) hiResHolo = os.path.join(hiResHolod, 'hiResHolo_dock_pv.maegz') open(hiResHolo, 'a').close() flist = task.get_uploadable_files() self.assertEqual(len(flist), 6) flist.index(hiResHolo) # try with stderr/out files errfile = os.path.join(task.get_dir(), 'glidedocking.py.stderr') open(errfile, 'a').close() outfile = os.path.join(task.get_dir(), 'glidedocking.py.stdout') open(outfile, 'a').close() flist = task.get_uploadable_files() self.assertEqual(len(flist), 8) flist.index(errfile) flist.index(outfile) flist.index(final_log) flist.index(LMCSS) flist.index(SMCSS) flist.index(hiResApo) flist.index(hiResHolo) flist.index(pbdidtxt) finally: shutil.rmtree(temp_dir)
def test_can_run(self): temp_dir = tempfile.mkdtemp() try: # no proteinligprep task found so it cannot run params = D3RParameters() glide = GlideTask(temp_dir, params) self.assertEqual(glide.can_run(), False) self.assertEqual(glide.get_error(), 'proteinligprep task has notfound status') # proteinligprep filter running proteinligprep = ProteinLigPrepTask(temp_dir, params) proteinligprep.create_dir() open(os.path.join(proteinligprep.get_dir(), D3RTask.START_FILE), 'a').close() glide = GlideTask(temp_dir, params) self.assertEqual(glide.can_run(), False) self.assertEqual(glide.get_error(), 'proteinligprep task has start status') # proteinligprep failed error_file = os.path.join(proteinligprep.get_dir(), D3RTask.ERROR_FILE) open(error_file, 'a').close() glide = GlideTask(temp_dir, params) self.assertEqual(glide.can_run(), False) self.assertEqual(glide.get_error(), 'proteinligprep task has error status') # proteinligprep success os.remove(error_file) open(os.path.join(proteinligprep.get_dir(), D3RTask.COMPLETE_FILE), 'a').close() glide = GlideTask(temp_dir, params) self.assertEqual(glide.can_run(), True) self.assertEqual(glide.get_error(), None) # glide task exists already glide = GlideTask(temp_dir, params) glide.create_dir() self.assertEqual(glide.can_run(), False) self.assertEqual(glide.get_error(), glide.get_dir_name() + ' already exists and status is unknown') # glide already complete glide = GlideTask(temp_dir, params) open(os.path.join(glide.get_dir(), D3RTask.COMPLETE_FILE), 'a').close() self.assertEqual(glide.can_run(), False) self.assertEqual(glide.get_error(), None) finally: shutil.rmtree(temp_dir)
def main(): blasttask = BlastNFilterTask('', p) dataimport = DataImportTask('', p) challenge = ChallengeDataTask('', p) glide = GlideTask('', p) makedb = MakeBlastDBTask('', p) prot = ProteinLigPrepTask('', p) vina = AutoDockVinaTask('', p) chimeraprep = ChimeraProteinLigPrepTask('', p) desc = """ Version {version} Runs the 9 stages (makedb, import, blast, challengedata, proteinligprep, {chimeraprep}, extsubmission, glide, vina, & evaluation) of CELPP processing pipeline (http://www.drugdesigndata.org) CELPP processing pipeline relies on a set of directories with specific structure. The pipeline runs a set of stages Each stage has a numerical value and a name. The numerical value denotes order and the stage name identifies separate tasks to run in the stage. The filesystem structure of the stage is: stage.<stage number>.<task name> The stage(s) run are defined via the required --stage flag. To run multiple stages serially just pass a comma delimited list to the --stage flag. Example: --stage import,blast NOTE: When running multiple stages serially the program will not run subsequent stages if a task in a stage fails. Also note order matters, ie putting blast,import will cause celpprunner.py to run blast stage first. This program drops a pid lockfile (celpprunner.<stage>.lockpid) in celppdir to prevent duplicate invocation. When run, this program will examine the stage and see if work can be done. If stage is complete or previous steps have not completed, the program will exit silently. If previous steps have failed or current stage already exists in an error or uncomplete state then program will report the error via email using addresses set in --email flag. Errors will also be reported via stderr/stdout. The program will also exit with nonzero exit code. This program utilizes simple token files to denote stage completion. If within the stage directory there is a: '{complete}' file - then stage is done and no other checking is done. 'error' file - then stage failed. 'start' file - then stage is running. Notification of stage start and end will be sent to addresses set via --email flag. Unless --customweekdir is set, this program will examine the 'celppdir' (last argument passed on commandline) to find the latest directory with this path: <year>/dataset.week.# The program will find the latest <year> and within that year the dataset.week.# with highest #. The output directories created will be put within this directory. Setting --customweekdir will cause program to use 'celppdir' path. Setting the --createweekdir flag will instruct this program to create a new directory for the current celpp week/year before running any stage processing. NOTE: CELPP weeks start on Friday and end on Thursday and week # follows ISO8601 rules so week numbers at the end and start of the year are a bit wonky. Breakdown of behavior of program is defined by value passed with --stage flag: If --stage '{createchallenge}' This is NOT a stage, but has the same effect as calling --stage makedb,import,blast,challengedata The four stages that need to run to generate the challenge data package. If --stage 'makedb' In this stage the file {pdb_seqres} is downloaded from an ftp site set by --pdbsequrl. This file is then gunzipped and NCBI makeblastdb (set by --makeblastdb) is run on it to create a blast database. The files are stored in {makeblastdb_dirname} If --stage 'import' In this stage 4 files are downloaded from urls specified by --compinchi and --pdbfileurl flags on the commandline into {dataimport_dirname} directory. The tsv files are (--pdbfileurl flag sets url to download these files from): {nonpolymer_tsv} {sequence_tsv} {crystal_tsv} The Components ich file is (--compinchi flag sets base url to download this file from): {compinchi_ich} This stage will just wait and retry if any of the tsv files have NOT been updated since the start of the current celpp week as determined by a HEAD request. To bypass this delay add --skipimportwait flag. --importsleep denotes the time to wait before re-examining the update time of the tsv files and --importretry sets number of times to retry before giving up. If --stage 'blast' Verifies {dataimport_dirname} exists and has '{complete}' file. Also verifies {makeblastdb_dirname} exists and has '{complete}' file. If both conditions are met then the 'blast' stage is run which invokes script set by --blastnfilter flag and output stored in {blast_dirname}. Requires --pdbdb to be set to a directory with valid PDB database files. Note: --blastnfilter script is killed after time set with --blastnfiltertimeout flag. If --stage 'challengedata' Verifies {blast_dirname} exists and has '{complete}' file. If complete, this stage runs which invokes program set in --genchallenge flag to create a challenge dataset file. The --pdbdb flag must also be set when calling this stage. If --ftpconfig is set with {challengepath} field then this stage will also upload the challenge dataset tarfile to the ftp server with path set by {challengepath}. The code will also upload a {latest_txt} file containing name of the tarfile to the same destination overwriting any {latest_txt} file that already exists. Example file for --ftpconfig: {host} some.ftp.com {user} bob {passn} mypass {path} /celpp {challengepath} /challenge {submissionpath} /submissions If --stage '{chimeraprep}' Verifies {challenge_dirname} exists and has '{complete}' file. If complete, this stage runs which invokes program set in --chimeraprep flag to prepare pdb and inchi files storing output in {chimeraprep_dirname}. --pdbdb flag must also be set when calling this stage. If --stage 'proteinligprep' Verifies {challenge_dirname} exists and has '{complete}' file. If complete, this stage runs which invokes program set in --proteinligprep flag to prepare pdb and inchi files storing output in {proteinligprep_dirname}. --pdbdb flag must also be set when calling this stage. If --stage 'extsubmission' Connects to server specified by --ftpconfig and downloads external docking submissions from {submissionpath} on remote server. Submissions should be named: celpp_weekXX_YYYY_dockedresults_ZZZZ.tar.gz as documented here: https://github.com/drugdata/d3r/wiki/Proposed-challenge-docked\ -results-file-structure For each submission a directory named stage.X.ZZZZ.extsubmission will be created and uncompressed contents of package will be stored in that directory. If data does not conform properly 'error' file will be placed in directory denoting failure If --stage 'glide' Verifies {proteinligprep_dirname} exists and has a '{complete}' file within it. If complete, this stage runs which invokes program set in --glide flag to perform docking via glide storing output in {glide_dirname} If --stage 'vina' Verifies {proteinligprep_dirname} exists and has a '{complete}' file within it. If complete, this stage runs which invokes program set in --vina flag to perform docking via AutoDock Vina storing output in {vina_dirname} If --stage 'evaluation' Finds all stage.{dockstage}.<algo> directories with '{complete}' files in them which do not end in name '{webdata}' and runs script set via --evaluation parameter storing the result of the script into stage.{evalstage}.<algo>.evaluation. --pdbdb flag must also be set when calling this stage. """.format(makeblastdb_dirname=makedb.get_dir_name(), dataimport_dirname=dataimport.get_dir_name(), blast_dirname=blasttask.get_dir_name(), challenge_dirname=challenge.get_dir_name(), createchallenge=CREATE_CHALLENGE, proteinligprep_dirname=prot.get_dir_name(), glide_dirname=glide.get_dir_name(), vina_dirname=vina.get_dir_name(), dockstage=str(glide.get_stage()), evalstage=str(glide.get_stage() + 1), complete=blasttask.COMPLETE_FILE, chimeraprep_dirname=chimeraprep.get_dir_name(), chimeraprep=CHIMERA_PREP, compinchi_ich=DataImportTask.COMPINCHI_ICH, pdb_seqres=MakeBlastDBTask.PDB_SEQRES_TXT_GZ, nonpolymer_tsv=DataImportTask.NONPOLYMER_TSV, sequence_tsv=DataImportTask.SEQUENCE_TSV, crystal_tsv=DataImportTask.CRYSTALPH_TSV, webdata=EvaluationTaskFactory.WEB_DATA_SUFFIX, latest_txt=ChallengeDataTask.LATEST_TXT, host=FtpFileTransfer.HOST, user=FtpFileTransfer.USER, passn=FtpFileTransfer.PASS, path=FtpFileTransfer.PATH, challengepath=FtpFileTransfer.CHALLENGEPATH, submissionpath=FtpFileTransfer.SUBMISSIONPATH, version=d3r.__version__) theargs = _parse_arguments(desc, sys.argv[1:]) theargs.program = sys.argv[0] theargs.version = d3r.__version__ util.setup_logging(theargs) try: run_stages(theargs) except Exception: logger.exception("Error caught exception") sys.exit(2)
def get_task_list_for_stage(theargs, stage_name): """Factory method that generates a list of tasks for given stage Using stage_name get the list of tasks that need to be run. :param theargs: parameters set via commandline along with ``theargs.latest_weekly`` which should be set to to base directory where stages will be run :param stage_name: Name of stage to run """ if stage_name is None: raise NotImplementedError('stage_name is None') task_list = [] logger.debug('Getting task list for ' + stage_name) if stage_name == CREATE_CHALLENGE: task_list.append(MakeBlastDBTask(theargs.latest_weekly, theargs)) task_list.append(DataImportTask(theargs.latest_weekly, theargs)) task_list.append(BlastNFilterTask(theargs.latest_weekly, theargs)) task_list.append(ChallengeDataTask(theargs.latest_weekly, theargs)) if stage_name == 'makedb': task_list.append(MakeBlastDBTask(theargs.latest_weekly, theargs)) if stage_name == 'import': task_list.append(DataImportTask(theargs.latest_weekly, theargs)) if stage_name == 'blast': task_list.append(BlastNFilterTask(theargs.latest_weekly, theargs)) if stage_name == 'challengedata': task_list.append(ChallengeDataTask(theargs.latest_weekly, theargs)) if stage_name == 'proteinligprep': task_list.append(ProteinLigPrepTask(theargs.latest_weekly, theargs)) if stage_name == 'glide': task_list.append(GlideTask(theargs.latest_weekly, theargs)) if stage_name == 'vina': task_list.append(AutoDockVinaTask(theargs.latest_weekly, theargs)) if stage_name == CHIMERA_PREP: task_list.append( ChimeraProteinLigPrepTask(theargs.latest_weekly, theargs)) if stage_name == 'extsubmission': extfac = ExternalDataSubmissionFactory(theargs.latest_weekly, theargs) task_list.extend(extfac.get_external_data_submissions()) if stage_name == 'evaluation': # use util function call to get all evaluation tasks # append them to the task_list eval_task_factory = EvaluationTaskFactory(theargs.latest_weekly, theargs) task_list.extend(eval_task_factory.get_evaluation_tasks()) if len(task_list) is 0: raise NotImplementedError('uh oh no tasks for ' + stage_name + ' stage') return task_list
def test_get_uploadable_files(self): temp_dir = tempfile.mkdtemp() try: params = D3RParameters() task = EvaluationTask(temp_dir, 'glide', GlideTask(temp_dir, params), params) # try with no dir self.assertEqual(task.get_uploadable_files(), []) # try with empty dir task.create_dir() self.assertEqual(task.get_uploadable_files(), []) # try with final log final_log = os.path.join(task.get_dir(), EvaluationTask.FINAL_LOG) open(final_log, 'a').close() flist = task.get_uploadable_files() self.assertEqual(len(flist), 1) flist.index(final_log) # try with RMSD.txt rmsd = os.path.join(task.get_dir(), EvaluationTask.RMSD_TXT) open(rmsd, 'a').close() flist = task.get_uploadable_files() self.assertEqual(len(flist), 2) flist.index(rmsd) # try with empty pbdid dir pbdid = os.path.join(task.get_dir(), '8www') os.mkdir(pbdid) flist = task.get_uploadable_files() self.assertEqual(len(flist), 2) flist.index(rmsd) # try with score/rot-LMCSS_doc_pv_complex1.pdb score = os.path.join(pbdid, 'score') os.mkdir(score) flist = task.get_uploadable_files() self.assertEqual(len(flist), 2) LMCSS = os.path.join(score, 'LMCSS-1fcz_1fcz_docked_complex.pdb') open(LMCSS, 'a').close() flist = task.get_uploadable_files() self.assertEqual(len(flist), 3) flist.index(LMCSS) # try with score/rot-SMCSS_doc_pv_complex1.pdb SMCSS = os.path.join(score, 'SMCSS-1fcz_2lbd_docked_complex.pdb') open(SMCSS, 'a').close() flist = task.get_uploadable_files() self.assertEqual(len(flist), 4) flist.index(SMCSS) # try with score/rot-hiResApo_doc_pv_complex1.pdb hiResApo = os.path.join(score, 'hiResHolo-1fcz_1fcy_docked_complex.pdb') open(hiResApo, 'a').close() flist = task.get_uploadable_files() self.assertEqual(len(flist), 5) flist.index(hiResApo) # try with score/rot-hiResHolo_doc_pv_complex1.pdb hiResHolo = os.path.join( score, 'hiTanimoto-1fcz_1fcz_docked_complex.pdb') open(hiResHolo, 'a').close() flist = task.get_uploadable_files() self.assertEqual(len(flist), 6) flist.index(hiResHolo) # try with score/crystal.pdb crystal = os.path.join(score, 'crystal.pdb') open(crystal, 'a').close() flist = task.get_uploadable_files() self.assertEqual(len(flist), 7) flist.index(crystal) # try with RMSD.pickle rmsdpickle = os.path.join(task.get_dir(), EvaluationTask.RMSD_PICKLE) open(rmsdpickle, 'a').close() flist = task.get_uploadable_files() self.assertEqual(len(flist), 8) flist.index(rmsdpickle) # try with stderr/stdout files errfile = os.path.join(task.get_dir(), 'evaluate.py.stderr') open(errfile, 'a').close() outfile = os.path.join(task.get_dir(), 'evaluate.py.stdout') open(outfile, 'a').close() flist = task.get_uploadable_files() self.assertEqual(len(flist), 10) flist.index(crystal) flist.index(hiResHolo) flist.index(hiResApo) flist.index(SMCSS) flist.index(LMCSS) flist.index(errfile) flist.index(outfile) flist.index(final_log) flist.index(rmsd) flist.index(rmsdpickle) finally: shutil.rmtree(temp_dir)
class TestCelppRunner(unittest.TestCase): """Tests celpprunner command line script """ param = D3RParameters() blast = BlastNFilterTask('/foo', param) BLAST_DIR_NAME = blast.get_dir_name() BLAST_NAME = blast.get_name() data = DataImportTask('/foo', param) IMPORT_DIR_NAME = data.get_dir_name() IMPORT_NAME = data.get_name() makedb = MakeBlastDBTask('/foo', param) MAKEDB_DIR_NAME = makedb.get_dir_name() MAKEDB_NAME = makedb.get_name() glide = GlideTask('/foo', param) GLIDE_DIR_NAME = glide.get_dir_name() prot = ProteinLigPrepTask('/foo', param) PROT_DIR_NAME = prot.get_dir_name() vina = AutoDockVinaTask('/foo', param) VINA_DIR_NAME = vina.get_dir_name() chall = ChallengeDataTask('/foo', param) CHALL_DIR_NAME = chall.get_dir_name() CHALL_NAME = chall.get_name() chimeraprep = ChimeraProteinLigPrepTask('/foo', param) CHIMERAPREP_DIR_NAME = chimeraprep.get_dir_name() def setUp(self): pass def test_get_lock(self): temp_dir = tempfile.mkdtemp() try: theargs = D3RParameters() theargs.latest_weekly = temp_dir # get the lock file which should work lock = celpprunner._get_lock(theargs, 'blast') expectedLockFile = os.path.join(temp_dir, 'celpprunner.blast.lockpid') self.assertTrue(os.path.isfile(expectedLockFile)) # try getting lock again which should also work lock = celpprunner._get_lock(theargs, 'blast') lock.release() self.assertFalse(os.path.isfile(expectedLockFile)) finally: shutil.rmtree(temp_dir) def test_get_lock_where_lockfile_exists_and_process_is_running(self): temp_dir = tempfile.mkdtemp() try: theargs = D3RParameters() theargs.latest_weekly = temp_dir lockfile = os.path.join(temp_dir, 'celpprunner.blast.lockpid') f = open(lockfile, 'w') pid = str(os.getppid()) f.write(pid) f.flush() f.close() # get the lock file which should work try: celpprunner._get_lock(theargs, 'blast') self.fail('Expected Exception') except Exception as e: self.assertEqual(str(e), 'celpprunner with pid ' + pid + ' is running') finally: shutil.rmtree(temp_dir) def test_parse_arguments(self): theargs = ['--stage', 'blast', 'foo'] result = celpprunner._parse_arguments('hi', theargs) self.assertEqual(result.stage, 'blast') self.assertEqual(result.celppdir, 'foo') self.assertEqual(result.email, None) self.assertEqual(result.summaryemail, None) self.assertEqual(result.loglevel, celpprunner.DEFAULT_LOG_LEVEL) self.assertEqual(result.blastnfilter, 'blastnfilter.py') self.assertEqual(result.proteinligprep, 'proteinligprep.py') self.assertEqual(result.evaluation, 'evaluate.py') self.assertEqual(result.makeblastdb, 'makeblastdb') self.assertEqual(result.genchallenge, 'genchallengedata.py') self.assertEqual(result.chimeraprep, 'chimera_proteinligprep.py') self.assertEqual(result.skipimportwait, False) self.assertEqual(result.importretry, 60) self.assertEqual(result.importsleep, 600) self.assertEqual(result.rdkitpython, '') self.assertEqual(result.summaryemail, None) self.assertEqual(result.postevaluation, 'post_evaluation.py') theargs = ['foo', '--stage', 'dock,glide', '--email', '[email protected],h@h', '--log', 'ERROR', '--blastnfilter', '/bin/blastnfilter.py', '--proteinligprep', '/bin/proteinligprep.py', '--postanalysis', '/bin/postanalysis.py', '--glide', '/bin/glide.py', '--vina', '/bin/vina.py', '--customweekdir', '--evaluation', '/bin/evaluation.py', '--makeblastdb', '/bin/makeblastdb', '--genchallenge', '/bin/gen.py', '--chimeraprep', '/bin/chimeraprep.py', '--skipimportwait', '--importretry', '10', '--importsleep', '30', '--rdkitpython', '/usr/bin', '--summaryemail', 'j@j,g@g', '--postevaluation', '/bin/yo.py'] result = celpprunner._parse_arguments('hi', theargs) self.assertEqual(result.stage, 'dock,glide') self.assertEqual(result.celppdir, 'foo') self.assertEqual(result.email, '[email protected],h@h') self.assertEqual(result.summaryemail, 'j@j,g@g') self.assertEqual(result.loglevel, 'ERROR') self.assertEqual(result.blastnfilter, '/bin/blastnfilter.py') self.assertEqual(result.proteinligprep, '/bin/proteinligprep.py') self.assertEquals(result.postanalysis, '/bin/postanalysis.py') self.assertEquals(result.glide, '/bin/glide.py') self.assertEquals(result.evaluation, '/bin/evaluation.py') self.assertEquals(result.customweekdir, True) self.assertEqual(result.makeblastdb, '/bin/makeblastdb') self.assertEqual(result.vina, '/bin/vina.py') self.assertEqual(result.genchallenge, '/bin/gen.py') self.assertEqual(result.chimeraprep, '/bin/chimeraprep.py') self.assertEqual(result.skipimportwait, True) self.assertEqual(result.importretry, 10) self.assertEqual(result.importsleep, 30) self.assertEqual(result.rdkitpython, '/usr/bin') self.assertEqual(result.postevaluation, '/bin/yo.py') def test_run_tasks_passing_none_and_empty_list(self): self.assertEquals(celpprunner.run_tasks(None), 3) task_list = [] self.assertEquals(celpprunner.run_tasks(task_list), 2) def test_run_one_successful_task(self): success_task = DummyTask(D3RParameters(), 'foo', None, True, None, None) success_task.set_name('dummy') task_list = [] task_list.append(success_task) self.assertEquals(celpprunner.run_tasks(task_list), 0) def test_run_one_fail_task_with_error_message(self): task = DummyTask(D3RParameters(), 'foo', 'someerror', True, None, None) task.set_name('dummy') task_list = [] task_list.append(task) self.assertEquals(celpprunner.run_tasks(task_list), 1) self.assertEquals(task.get_error(), 'someerror') def test_run_one_fail_task_with_exception_and_no_message(self): task = DummyTask(D3RParameters(), 'foo', None, True, None, Exception('hi')) task.set_name('dummy') task_list = [] task_list.append(task) self.assertEquals(celpprunner.run_tasks(task_list), 1) self.assertEquals(task.get_error(), 'Caught Exception running task: hi') def test_run_two_tasks_success(self): task_list = [] task = DummyTask(D3RParameters(), 'foo', None, True, None, None) task.set_name('dummy') task_list.append(task) task_list.append(task) self.assertEquals(celpprunner.run_tasks(task_list), 0) self.assertEquals(task._run_count, 2) def test_run_two_tasks_second_task_has_error(self): task_list = [] task = DummyTask(D3RParameters(), 'foo', None, True, None, None) task.set_name('dummy') task_list.append(task) task_two = DummyTask(D3RParameters(), 'foo', None, True, None, Exception('hi')) task_two.set_name('dummy') task_list.append(task_two) self.assertEquals(celpprunner.run_tasks(task_list), 1) self.assertEquals(task._run_count, 1) self.assertEquals(task_two._run_count, 1) self.assertEquals(task_two.get_error(), 'Caught Exception running task: hi') def test_run_two_tasks_first_task_has_error(self): task_list = [] task = DummyTask(D3RParameters(), 'foo', None, True, None, Exception('hi')) task.set_name('dummy') task_list.append(task) task_two = DummyTask(D3RParameters(), 'foo', None, True, None, None) task_two.set_name('dummy') task_list.append(task_two) self.assertEquals(celpprunner.run_tasks(task_list), 1) self.assertEquals(task.get_error(), 'Caught Exception running task: hi') self.assertEquals(task._run_count, 1) self.assertEquals(task_two._run_count, 1) def test_get_set_of_email_address_from_email_flags(self): params = D3RParameters() res = celpprunner._get_set_of_email_address_from_email_flags(params) self.assertEqual(res, None) # email set params.email = '*****@*****.**' res = celpprunner._get_set_of_email_address_from_email_flags(params) self.assertEqual(res, ['*****@*****.**']) # email set w dup params.email = '[email protected],[email protected],[email protected]' res = celpprunner._get_set_of_email_address_from_email_flags(params) self.assertEqual(res, ['*****@*****.**', '*****@*****.**']) # summary set only params.email = None params.summaryemail = '*****@*****.**' res = celpprunner._get_set_of_email_address_from_email_flags(params) self.assertEqual(res, ['*****@*****.**']) # summary w dups params.summaryemail = '[email protected],[email protected],[email protected]' res = celpprunner._get_set_of_email_address_from_email_flags(params) self.assertEqual(res, ['*****@*****.**', '*****@*****.**']) # both set params.email = '[email protected],[email protected]' res = celpprunner._get_set_of_email_address_from_email_flags(params) self.assertEqual(res, ['*****@*****.**', '*****@*****.**', '*****@*****.**']) def test_get_task_list_for_stage_with_invalid_stage_name(self): try: celpprunner.get_task_list_for_stage(D3RParameters(), None) self.fail('Expected exception') except NotImplementedError as e: self.assertEquals(e.message, 'stage_name is None') try: celpprunner.get_task_list_for_stage(D3RParameters(), '') self.fail('Expected exception') except NotImplementedError as e: self.assertEquals(e.message, 'uh oh no tasks for stage') try: celpprunner.get_task_list_for_stage(D3RParameters(), 'foo') self.fail('Expected exception') except NotImplementedError as e: self.assertEquals(e.message, 'uh oh no tasks for foo stage') def test_get_task_list_for_stage_with_valid_stages(self): params = D3RParameters() params.latest_weekly = 'foo' task_list = celpprunner.get_task_list_for_stage(params, 'blast') self.assertEquals(len(task_list), 1) self.assertEquals(task_list[0].get_dir(), os.path.join('foo', TestCelppRunner.BLAST_DIR_NAME)) task_list = celpprunner.get_task_list_for_stage(params, 'proteinligprep') self.assertEquals(len(task_list), 1) self.assertEquals(task_list[0].get_dir(), os.path.join('foo', TestCelppRunner.PROT_DIR_NAME)) task_list = celpprunner.get_task_list_for_stage(params, 'import') self.assertEquals(len(task_list), 1) self.assertEquals(task_list[0].get_dir(), os.path.join('foo', TestCelppRunner.IMPORT_DIR_NAME)) task_list = celpprunner.get_task_list_for_stage(params, 'glide') self.assertEquals(len(task_list), 1) self.assertEquals(task_list[0].get_dir(), os.path.join('foo', TestCelppRunner.GLIDE_DIR_NAME)) task_list = celpprunner.get_task_list_for_stage(params, 'vina') self.assertEquals(len(task_list), 1) self.assertEquals(task_list[0].get_dir(), os.path.join('foo', TestCelppRunner.VINA_DIR_NAME)) task_list = celpprunner.get_task_list_for_stage(params, 'challengedata') self.assertEquals(len(task_list), 1) self.assertEquals(task_list[0].get_dir(), os.path.join('foo', TestCelppRunner.CHALL_DIR_NAME)) task_list = celpprunner.get_task_list_for_stage(params, 'chimeraprep') self.assertEquals(len(task_list), 1) self.assertEquals(task_list[0].get_dir(), os.path.join('foo', TestCelppRunner.CHIMERAPREP_DIR_NAME)) def test_get_task_list_for_stage_createchallenge(self): temp_dir = tempfile.mkdtemp() try: params = D3RParameters() params.latest_weekly = temp_dir task_list = celpprunner.get_task_list_for_stage( params, celpprunner.CREATE_CHALLENGE) self.assertEqual(len(task_list), 4) self.assertEqual(task_list[0].get_name(), TestCelppRunner.MAKEDB_NAME) self.assertEqual(task_list[1].get_name(), TestCelppRunner.IMPORT_NAME) self.assertEqual(task_list[2].get_name(), TestCelppRunner.BLAST_NAME) self.assertEqual(task_list[3].get_name(), TestCelppRunner.CHALL_NAME) finally: shutil.rmtree(temp_dir) def test_get_task_list_for_stage_for_scoring_stage_with_nonefound(self): temp_dir = tempfile.mkdtemp() try: params = D3RParameters() params.latest_weekly = temp_dir try: celpprunner.get_task_list_for_stage(params, 'evaluation') except NotImplementedError as e: self.assertEqual(e.message, 'uh oh no tasks for evaluation stage') finally: shutil.rmtree(temp_dir) def test_get_task_list_for_stage_for_scoring_stage_with_onefound(self): temp_dir = tempfile.mkdtemp() try: params = D3RParameters() blasttask = BlastNFilterTask(temp_dir, params) blasttask.create_dir() open(os.path.join(blasttask.get_dir(), D3RTask.COMPLETE_FILE), 'a').close() params.latest_weekly = temp_dir glidedir = os.path.join(temp_dir, EvaluationTaskFactory.DOCKSTAGE_PREFIX + 'glide') os.mkdir(glidedir) open(os.path.join(glidedir, D3RTask.COMPLETE_FILE), 'a').close() task_list = celpprunner.get_task_list_for_stage(params, 'evaluation') self.assertEqual(len(task_list), 1) self.assertEqual(task_list[0].get_name(), 'glide.evaluation') finally: shutil.rmtree(temp_dir) def test_get_task_list_for_stage_for_scoring_stage_with_twofound(self): temp_dir = tempfile.mkdtemp() try: params = D3RParameters() blasttask = BlastNFilterTask(temp_dir, params) blasttask.create_dir() open(os.path.join(blasttask.get_dir(), D3RTask.COMPLETE_FILE), 'a').close() params.latest_weekly = temp_dir glidedir = os.path.join(temp_dir, EvaluationTaskFactory.DOCKSTAGE_PREFIX + 'glide') os.mkdir(glidedir) open(os.path.join(glidedir, D3RTask.COMPLETE_FILE), 'a').close() freddir = os.path.join(temp_dir, EvaluationTaskFactory.DOCKSTAGE_PREFIX + 'fred') os.mkdir(freddir) open(os.path.join(freddir, D3RTask.COMPLETE_FILE), 'a').close() task_list = celpprunner.get_task_list_for_stage(params, 'evaluation') self.assertEqual(len(task_list), 2) finally: shutil.rmtree(temp_dir) def test_run_stages_no_weekly_datasetfound(self): temp_dir = tempfile.mkdtemp() try: theargs = D3RParameters() theargs.celppdir = temp_dir self.assertEqual(celpprunner.run_stages(theargs), 0) finally: shutil.rmtree(temp_dir) def test_run_stages_invalid_stage(self): temp_dir = tempfile.mkdtemp() try: theargs = D3RParameters() theargs.celppdir = temp_dir theargs.stage = 'foo' os.mkdir(os.path.join(temp_dir, '2015')) os.mkdir(os.path.join(temp_dir, '2015', 'dataset.week.1')) try: celpprunner.run_stages(theargs) except NotImplementedError as e: self.assertEquals(e.message, 'uh oh no tasks for foo stage') finally: shutil.rmtree(temp_dir) def test_run_stages_blast_stage_data_import_missing(self): temp_dir = tempfile.mkdtemp() try: theargs = D3RParameters() theargs.celppdir = temp_dir theargs.stage = 'blast' os.mkdir(os.path.join(temp_dir, '2015')) os.mkdir(os.path.join(temp_dir, '2015', 'dataset.week.1')) makedb_dir = os.path.join(temp_dir, '2015', 'dataset.week.1', TestCelppRunner.MAKEDB_DIR_NAME) os.makedirs(makedb_dir) open(os.path.join(makedb_dir, 'complete'), 'a').close() self.assertEquals(celpprunner.run_stages(theargs), 1) finally: shutil.rmtree(temp_dir) def test_run_stages_blast(self): temp_dir = tempfile.mkdtemp() try: theargs = D3RParameters() theargs.celppdir = os.path.join(temp_dir) theargs.stage = 'blast' theargs.pdbdb = '/pdbdb' makedb_dir = os.path.join(temp_dir, '2015', 'dataset.week.1', TestCelppRunner.MAKEDB_DIR_NAME) os.makedirs(makedb_dir) open(os.path.join(makedb_dir, 'complete'), 'a').close() d_import_dir = os.path.join(temp_dir, '2015', 'dataset.week.1', TestCelppRunner.IMPORT_DIR_NAME) os.makedirs(d_import_dir) open(os.path.join(d_import_dir, 'complete'), 'a').close() theargs.blastnfilter = 'echo' theargs.postanalysis = 'true' self.assertEqual(celpprunner.run_stages(theargs), 0) finally: shutil.rmtree(temp_dir) def test_run_stages_blast_has_error(self): temp_dir = tempfile.mkdtemp() try: theargs = D3RParameters() theargs.celppdir = os.path.join(temp_dir) theargs.stage = 'blast' os.mkdir(os.path.join(temp_dir, TestCelppRunner.MAKEDB_DIR_NAME)) open(os.path.join(temp_dir, TestCelppRunner.MAKEDB_DIR_NAME, 'error'), 'a').close() os.mkdir(os.path.join(temp_dir, '2015')) os.mkdir(os.path.join(temp_dir, '2015', 'dataset.week.1')) self.assertEqual(celpprunner.run_stages(theargs), 1) finally: shutil.rmtree(temp_dir) def test_run_stages_challenge_and_proteinligprep_no_error(self): temp_dir = tempfile.mkdtemp() try: theargs = D3RParameters() theargs.pdbdb = '/pdbdb' theargs.celppdir = os.path.join(temp_dir) theargs.stage = 'challengedata,proteinligprep' blastdb_dir = os.path.join(temp_dir, '2015', 'dataset.week.1', TestCelppRunner.BLAST_DIR_NAME) os.makedirs(blastdb_dir) open(os.path.join(blastdb_dir, 'complete'), 'a').close() theargs.proteinligprep = 'echo' theargs.genchallenge = 'echo' self.assertEqual(celpprunner.run_stages(theargs), 0) finally: shutil.rmtree(temp_dir) def test_run_stages_blast_and_proteinligprep_blast_has_error(self): temp_dir = tempfile.mkdtemp() try: theargs = D3RParameters() theargs.celppdir = os.path.join(temp_dir) theargs.stage = 'blast,proteinligprep' os.mkdir(os.path.join(temp_dir, TestCelppRunner.MAKEDB_DIR_NAME)) open(os.path.join(temp_dir, TestCelppRunner.MAKEDB_DIR_NAME, 'complete'), 'a').close() d_import_dir = os.path.join(temp_dir, '2015', 'dataset.week.1', TestCelppRunner.IMPORT_DIR_NAME) os.makedirs(d_import_dir) open(os.path.join(d_import_dir, D3RTask.ERROR_FILE), 'a').close() theargs.blastnfilter = 'echo' theargs.proteinligprep = 'echo' self.assertEqual(celpprunner.run_stages(theargs), 1) finally: shutil.rmtree(temp_dir) def test_run_stages_makedb_blast_chall_proteinligprep_glide_no_error(self): temp_dir = tempfile.mkdtemp() try: theargs = D3RParameters() theargs.pdbdb = '/pdbdb' theargs.celppdir = os.path.join(temp_dir) theargs.stage = 'makedb,blast,challengedata,proteinligprep,glide' d_import_dir = os.path.join(temp_dir, '2015', 'dataset.week.1', TestCelppRunner.IMPORT_DIR_NAME) os.makedirs(d_import_dir) open(os.path.join(d_import_dir, D3RTask.COMPLETE_FILE), 'a').close() fakegz = os.path.join(temp_dir, 'fake.gz') f = gzip.open(fakegz, 'wb') f.write('hello\n') f.flush() f.close() theargs.pdbsequrl = 'file://'+fakegz theargs.makeblastdb = 'echo' theargs.blastnfilter = 'echo' theargs.postanalysis = 'true' theargs.proteinligprep = 'echo' theargs.glide = 'echo' theargs.genchallenge = 'echo' self.assertEqual(celpprunner.run_stages(theargs), 0) finally: shutil.rmtree(temp_dir) def test_run_stages_makedb_through_glide(self): """This should test the following stages will run makedb,import,blast,challengedata,proteinligprep,glide,vina """ temp_dir = tempfile.mkdtemp() try: theargs = D3RParameters() theargs.pdbdb = '/pdbdb' theargs.celppdir = os.path.join(temp_dir) theargs.stage = 'makedb,import,blast,challengedata,proteinligprep,' \ 'chimeraprep,glide,vina' d_import_dir = os.path.join(temp_dir, '2015', 'dataset.week.1', TestCelppRunner.IMPORT_DIR_NAME) os.makedirs(d_import_dir) open(os.path.join(d_import_dir, D3RTask.COMPLETE_FILE), 'a').close() fakegz = os.path.join(temp_dir, 'fake.gz') f = gzip.open(fakegz, 'wb') f.write('hello\n') f.flush() f.close() theargs.pdbsequrl = 'file://' + fakegz theargs.pdbfileurl = 'file://' + fakegz theargs.compinchi = 'file://' + fakegz theargs.version = '1.0.0' theargs.makeblastdb = 'echo' theargs.blastnfilter = 'echo' theargs.postanalysis = 'true' theargs.proteinligprep = 'echo' theargs.glide = 'echo' theargs.vina = 'echo' theargs.genchallenge = 'echo' theargs.chimeraprep = 'echo' self.assertEqual(celpprunner.run_stages(theargs), 0) finally: shutil.rmtree(temp_dir) def test_get_task_list_for_stage_extsubmission(self): temp_dir = tempfile.mkdtemp() try: theargs = D3RParameters() theargs.pdbdb = '/pdbdb' theargs.latest_weekly = temp_dir theargs.stage = 'extsubmission' try: celpprunner.get_task_list_for_stage(theargs, 'extsubmission') self.fail('expected NotImplementedError') except NotImplementedError: pass finally: shutil.rmtree(temp_dir) def test_get_task_list_for_stage_postevaluation(self): temp_dir = tempfile.mkdtemp() try: theargs = D3RParameters() theargs.latest_weekly = temp_dir theargs.stage = celpprunner.POST_EVAL t_list = celpprunner.get_task_list_for_stage(theargs, theargs.stage) self.assertEqual(len(t_list), 1) self.assertEqual(t_list[0].get_name(), 'postevaluation') # try this time with email set theargs.email = '[email protected],[email protected]' t_list = celpprunner.get_task_list_for_stage(theargs, theargs.stage) self.assertEqual(len(t_list), 1) self.assertEqual(t_list[0].get_name(), 'postevaluation') # try this time with both set theargs.email = '[email protected],[email protected]' theargs.summaryemail = '[email protected],[email protected]' t_list = celpprunner.get_task_list_for_stage(theargs, theargs.stage) self.assertEqual(len(t_list), 1) self.assertEqual(t_list[0].get_name(), 'postevaluation') finally: shutil.rmtree(temp_dir) def test_run_stages_createweekdir_set(self): temp_dir = tempfile.mkdtemp() try: theargs = D3RParameters() theargs.celppdir = os.path.join(temp_dir) theargs.createweekdir = True theargs.stage = '' d = date.today() celp_week = util.get_celpp_week_of_year_from_date(d) try: self.assertEquals(celpprunner.run_stages(theargs), 0) self.fail('Expected NotImplementedError') except NotImplementedError: pass expected_dir = os.path.join(temp_dir, str(celp_week[1]), 'dataset.week.' + str(celp_week[0])) self.assertEquals(os.path.isdir(expected_dir), True) finally: shutil.rmtree(temp_dir) def test_run_stages_customweekdir_set(self): temp_dir = tempfile.mkdtemp() try: theargs = D3RParameters() theargs.celppdir = temp_dir theargs.customweekdir = True theargs.createweekdir = True theargs.stage = '' try: self.assertEquals(celpprunner.run_stages(theargs), 0) self.fail('Expected NotImplementedError') except NotImplementedError: pass finally: shutil.rmtree(temp_dir) def test_main_success(self): temp_dir = tempfile.mkdtemp() try: theargs = ['celpprunner.py', '--stage', 'blast', '--pdbdb', '/pdbdb', '--blastnfilter', 'echo', '--postanalysis', 'true', temp_dir] makedb_dir = os.path.join(temp_dir, '2015', 'dataset.week.1', TestCelppRunner.MAKEDB_DIR_NAME) os.makedirs(makedb_dir) open(os.path.join(makedb_dir, 'complete'), 'a').close() d_import_dir = os.path.join(temp_dir, '2015', 'dataset.week.1', TestCelppRunner.IMPORT_DIR_NAME) os.makedirs(d_import_dir) open(os.path.join(d_import_dir, 'complete'), 'a').close() self.assertEqual(celpprunner.main(theargs), 0) finally: shutil.rmtree(temp_dir) def test_main_where_run_stages_raises_error(self): temp_dir = tempfile.mkdtemp() try: theargs = ['celpprunner.py', '--stage', 'foo', os.path.join(temp_dir, 'notexistdir')] self.assertEqual(celpprunner.main(theargs), 2) finally: shutil.rmtree(temp_dir) def tearDown(self): pass