def test_run_success_with_ftp_upload(self): temp_dir = tempfile.mkdtemp() try: script = self.create_gen_challenge_script(temp_dir) params = D3RParameters() params.genchallenge = script params.pdbdb = '/foo' params.version = '1' blastnfilter = BlastNFilterTask(temp_dir, params) blastnfilter.create_dir() open(os.path.join(blastnfilter.get_dir(), D3RTask.COMPLETE_FILE), 'a').close() chall = ChallengeDataTask(temp_dir, params) mockftp = D3RParameters() mockftp.put = Mock(side_effect=[3, 5]) ftp = FtpFileTransfer(None) ftp.set_remote_challenge_dir('/challenge') ftp.set_connection(mockftp) chall.set_file_transfer(ftp) dimport = DataImportTask(temp_dir, params) dimport.create_dir() ctsv = dimport.get_crystalph_tsv() f = open(ctsv, 'w') f.write('crystal') f.flush() f.close() nonpoly = dimport.get_nonpolymer_tsv() f = open(nonpoly, 'w') f.write('nonpoly') f.flush() f.close() seq = dimport.get_sequence_tsv() f = open(seq, 'w') f.write('seq') f.flush() f.close() chall.run() self.assertEqual(chall.get_error(), None) # verify test files get created errfile = os.path.join(chall.get_dir(), D3RTask.ERROR_FILE) self.assertEqual(os.path.isfile(errfile), False) compfile = os.path.join(chall.get_dir(), D3RTask.COMPLETE_FILE) self.assertEqual(os.path.isfile(compfile), True) ftp.disconnect() finally: shutil.rmtree(temp_dir)
def test_get_uploadable_files(self): temp_dir = tempfile.mkdtemp() try: params = D3RParameters() task = DataImportTask(temp_dir, params) self.assertEqual(task.get_uploadable_files(), []) task.create_dir() # test empty dir self.assertEqual(task.get_uploadable_files(), []) # test with only compinchi open(task.get_components_inchi_file(), 'a').close() flist = task.get_uploadable_files() self.assertEqual(len(flist), 1) flist.index(task.get_components_inchi_file()) # test with crystal file open(task.get_crystalph_tsv(), 'a').close() flist = task.get_uploadable_files() self.assertEqual(len(flist), 2) flist.index(task.get_components_inchi_file()) flist.index(task.get_crystalph_tsv()) # test with nonpolymer file open(task.get_nonpolymer_tsv(), 'a').close() flist = task.get_uploadable_files() self.assertEqual(len(flist), 3) flist.index(task.get_components_inchi_file()) flist.index(task.get_crystalph_tsv()) flist.index(task.get_nonpolymer_tsv()) # test with sequence file open(task.get_sequence_tsv(), 'a').close() flist = task.get_uploadable_files() self.assertEqual(len(flist), 4) flist.index(task.get_components_inchi_file()) flist.index(task.get_crystalph_tsv()) flist.index(task.get_nonpolymer_tsv()) flist.index(task.get_sequence_tsv()) finally: shutil.rmtree(temp_dir)
def test_append_standard_to_files(self): temp_dir = tempfile.mkdtemp() try: params = D3RParameters() task = DataImportTask(temp_dir, params) task.create_dir() task.append_standard_to_files() self.assertTrue(os.path.isfile(task.get_nonpolymer_tsv())) self.assertTrue(os.path.isfile(task.get_sequence_tsv())) self.assertTrue(os.path.isfile(task.get_crystalph_tsv())) # now do it again, but this time make the append fail # cause the nonpolymer_tsv is a directory os.unlink(task.get_nonpolymer_tsv()) os.makedirs(task.get_nonpolymer_tsv()) task.append_standard_to_files() finally: shutil.rmtree(temp_dir)
def test_copy_over_tsv_files(self): temp_dir = tempfile.mkdtemp() try: params = D3RParameters() dimport = DataImportTask(temp_dir, params) dimport.create_dir() ctsv = dimport.get_crystalph_tsv() f = open(ctsv, 'w') f.write('crystal') f.flush() f.close() nonpoly = dimport.get_nonpolymer_tsv() f = open(nonpoly, 'w') f.write('nonpoly') f.flush() f.close() seq = dimport.get_sequence_tsv() f = open(seq, 'w') f.write('seq') f.flush() f.close() task = ChallengeDataTask(temp_dir, params) task.create_dir() challenge_dir = task._create_challenge_dir() self.assertEqual(os.path.isdir(challenge_dir), True) task._copy_over_tsv_files(challenge_dir) cop_ctsv = os.path.join(challenge_dir, DataImportTask.CRYSTALPH_TSV) self.assertEqual(os.path.isfile(cop_ctsv), True) f = open(cop_ctsv) self.assertEqual(f.readline(), 'crystal') f.close() cop_nonpoly = os.path.join(challenge_dir, DataImportTask.NONPOLYMER_TSV) self.assertEqual(os.path.isfile(cop_nonpoly), True) f = open(cop_nonpoly) self.assertEqual(f.readline(), 'nonpoly') f.close() cop_seq = os.path.join(challenge_dir, DataImportTask.SEQUENCE_TSV) self.assertEqual(os.path.isfile(cop_seq), True) f = open(cop_seq) self.assertEqual(f.readline(), 'seq') f.close() finally: shutil.rmtree(temp_dir)
def _copy_over_tsv_files(self, challenge_dir): """Copies over tsv files from `DataImportTask` """ dataimport = DataImportTask(self.get_path(), self.get_args()) crystal_dest = os.path.join(challenge_dir, DataImportTask.CRYSTALPH_TSV) if os.path.isfile(dataimport.get_crystalph_tsv()): logger.debug('Copying over ' + dataimport.get_crystalph_tsv() + 'to ' + crystal_dest) shutil.copyfile(dataimport.get_crystalph_tsv(), crystal_dest) else: logger.warning(dataimport.get_crystalph_tsv() + ' file does not exist') nonpoly_dest = os.path.join(challenge_dir, DataImportTask.NONPOLYMER_TSV) if os.path.isfile(dataimport.get_nonpolymer_tsv()): logger.debug('Copying over ' + dataimport.get_nonpolymer_tsv() + 'to ' + nonpoly_dest) shutil.copyfile(dataimport.get_nonpolymer_tsv(), nonpoly_dest) else: logger.warning(dataimport.get_nonpolymer_tsv() + ' file does not exist') seq_dest = os.path.join(challenge_dir, DataImportTask.SEQUENCE_TSV) if os.path.isfile(dataimport.get_sequence_tsv()): logger.debug('Copying over ' + dataimport.get_sequence_tsv() + 'to ' + seq_dest) shutil.copyfile(dataimport.get_sequence_tsv(), seq_dest) else: logger.warning(dataimport.get_sequence_tsv() + ' file does not exist')
def test_run_all_success(self): temp_dir = tempfile.mkdtemp() try: fakeftp = FtpFileTransfer(None) mftp = D3RParameters() fakeftp.set_connection(mftp) fakeftp.set_remote_dir('/foo2') mftp.get = Mock() params = D3RParameters() params.pdbfileurl = 'file://' + temp_dir params.compinchi = 'file://' + temp_dir make_blast = MakeBlastDBTask(temp_dir, params) make_blast.create_dir() open(os.path.join(make_blast.get_dir(), D3RTask.COMPLETE_FILE), 'a').close() task = DataImportTask(temp_dir, params) task.set_file_transfer(fakeftp) task._retrysleep = 0 open(os.path.join(temp_dir, task.NONPOLYMER_TSV), 'a').close() open(os.path.join(temp_dir, task.SEQUENCE_TSV), 'a').close() open(os.path.join(temp_dir, task.OLDSEQUENCE_TSV), 'a').close() open(os.path.join(temp_dir, task.CRYSTALPH_TSV), 'a').close() open(os.path.join(temp_dir, task.COMPINCHI_ICH), 'a').close() task.run() self.assertEquals(task.get_error(), None) # check line count is 1 now which indicates # standard was added self.assertEqual( util.get_file_line_count(task.get_nonpolymer_tsv()), 1) self.assertEqual(util.get_file_line_count(task.get_sequence_tsv()), 1) self.assertEqual( util.get_file_line_count(task.get_oldsequence_tsv()), 1) self.assertEqual( util.get_file_line_count(task.get_crystalph_tsv()), 1) mftp.get.assert_called_with('/foo2/' + DataImportTask.PARTICIPANT_LIST_CSV, local=task.get_participant_list_csv()) finally: shutil.rmtree(temp_dir)
def test_tar_challenge_dir(self): temp_dir = tempfile.mkdtemp() try: params = D3RParameters() dimport = DataImportTask(temp_dir, params) dimport.create_dir() open(dimport.get_sequence_tsv(), 'a').close() open(dimport.get_nonpolymer_tsv(), 'a').close() open(dimport.get_crystalph_tsv(), 'a').close() task = ChallengeDataTask(temp_dir, params) task.create_dir() chall_dir = task._create_challenge_dir() final_log = os.path.join(chall_dir, 'final.log') open(final_log, 'a').close() task._create_readme(chall_dir) task._copy_over_tsv_files(chall_dir) # make a fake candidate file_list = self.make_fake_candidate_dir(chall_dir, '5hib', '2eb2', 'CSX') self.assertEqual(os.path.isdir(chall_dir), True) name = task.get_celpp_challenge_data_dir_name() tfile = task._tar_challenge_dir(name) self.assertEqual(os.path.isfile(tfile), True) foodir = os.path.join(temp_dir, 'foo') tar = tarfile.open(tfile, 'r:*') tar.extractall(path=foodir) tar.close() cdir = os.path.join(foodir, name) readme = os.path.join(cdir, ChallengeDataTask.README_TXT_FILE) self.assertEqual(os.path.isfile(readme), True) final_log = os.path.join(foodir, 'final.log') self.assertEqual(os.path.isfile(final_log), False) for fname in file_list: chk = os.path.join(cdir, fname) self.assertEqual(os.path.isfile(chk), True, chk) finally: shutil.rmtree(temp_dir)
def test_run_all_nonpolymer_fail(self): temp_dir = tempfile.mkdtemp() try: params = D3RParameters() params.pdbfileurl = 'file://' + temp_dir params.compinchi = 'file://' + temp_dir make_blast = MakeBlastDBTask(temp_dir, params) make_blast.create_dir() open(os.path.join(make_blast.get_dir(), D3RTask.COMPLETE_FILE), 'a').close() task = DataImportTask(temp_dir, params) task._retrysleep = 0 task.run() self.assertEquals( task.get_error(), 'Unable to download file ' + 'from ' + params.pdbfileurl + ' to ' + task.get_nonpolymer_tsv()) finally: shutil.rmtree(temp_dir)
def test_run_all_success_except_participant_download_fails(self): temp_dir = tempfile.mkdtemp() try: params = D3RParameters() params.pdbfileurl = 'file://' + temp_dir params.compinchi = 'file://' + temp_dir make_blast = MakeBlastDBTask(temp_dir, params) make_blast.create_dir() open(os.path.join(make_blast.get_dir(), D3RTask.COMPLETE_FILE), 'a').close() task = DataImportTask(temp_dir, params) task._retrysleep = 0 open(os.path.join(temp_dir, task.NONPOLYMER_TSV), 'a').close() open(os.path.join(temp_dir, task.SEQUENCE_TSV), 'a').close() open(os.path.join(temp_dir, task.OLDSEQUENCE_TSV), 'a').close() open(os.path.join(temp_dir, task.CRYSTALPH_TSV), 'a').close() open(os.path.join(temp_dir, task.COMPINCHI_ICH), 'a').close() task.run() self.assertEquals(task.get_error(), None) # check line count is 1 now which indicates # standard was added self.assertEqual( util.get_file_line_count(task.get_nonpolymer_tsv()), 1) self.assertEqual(util.get_file_line_count(task.get_sequence_tsv()), 1) self.assertEqual( util.get_file_line_count(task.get_oldsequence_tsv()), 1) self.assertEqual( util.get_file_line_count(task.get_crystalph_tsv()), 1) self.assertTrue(task.get_email_log().startswith( '\nWARNING: Unable to download')) finally: shutil.rmtree(temp_dir)
def run(self): """Runs blastnfilter task after verifying dataimport was good Method requires can_run() to be called before hand with successful outcome Otherwise method invokes D3RTask.start then this method creates a directory and invokes blastnfilter script and postanalysis script. Upon completion results are analyzed and success or error status is set appropriately and D3RTask.end is invoked """ super(BlastNFilterTask, self).run() if self._can_run is False: logger.debug(self.get_dir_name() + ' cannot run cause _can_run flag ' 'is False') return data_import = DataImportTask(self._path, self._args) make_blastdb = MakeBlastDBTask(self._path, self._args) try: loglevel = self.get_args().loglevel except AttributeError: logger.debug('No log level set in arguments using WARNING') loglevel = 'WARNING' # verify sequence.tsv file exists on filesystem. # if not fall back to oldsequence.tsv file sequencetsv = data_import.get_sequence_tsv() if not os.path.isfile(sequencetsv): logger.warning(sequencetsv + ' file not found. falling ' 'back to old file') self.append_to_email_log('\n ' + sequencetsv + ' file not found ' + 'falling back to ' + data_import.get_oldsequence_tsv() + '\n') sequencetsv = data_import.get_oldsequence_tsv() cmd_to_run = (self.get_args().blastnfilter + ' --nonpolymertsv ' + data_import.get_nonpolymer_tsv() + ' --sequencetsv ' + sequencetsv + ' --pdbblastdb ' + make_blastdb.get_dir() + ' --compinchi ' + data_import.get_components_inchi_file() + ' --crystalpH ' + data_import.get_crystalph_tsv() + ' --pdbdb ' + self.get_args().pdbdb + ' --log ' + loglevel + ' --outdir ' + self.get_dir()) blastnfilter_name = os.path.basename(self.get_args().blastnfilter) self.run_external_command( blastnfilter_name, cmd_to_run, False, ) self.set_status(D3RTask.COMPLETE_STATUS) cmd_to_run = (self.get_args().postanalysis + ' --compinchi ' + data_import.get_components_inchi_file() + ' ' + self.get_dir()) postanalysis_name = os.path.basename(self.get_args().postanalysis) self.run_external_command(postanalysis_name, cmd_to_run, False) try: # examine output to get candidate hit count DR-12 hit_stats = self._parse_blastnfilter_output_for_hit_stats() if hit_stats is not None: self.append_to_email_log(hit_stats) except Exception: logger.exception("Error caught exception") # assess the result self.end()
def test_get_nonpolymer_tsv(self): params = D3RParameters() task = DataImportTask('/foo', params) self.assertEqual( task.get_nonpolymer_tsv(), '/foo/' + task.get_dir_name() + '/new_release_structure_nonpolymer.tsv')
def test_run_succeeds_no_ftp(self): temp_dir = tempfile.mkdtemp() try: script = self.create_gen_challenge_script(temp_dir) params = D3RParameters() params.genchallenge = script params.pdbdb = '/foo' params.version = '1' blastnfilter = BlastNFilterTask(temp_dir, params) blastnfilter.create_dir() open(os.path.join(blastnfilter.get_dir(), D3RTask.COMPLETE_FILE), 'a').close() chall = ChallengeDataTask(temp_dir, params) dimport = DataImportTask(temp_dir, params) dimport.create_dir() ctsv = dimport.get_crystalph_tsv() f = open(ctsv, 'w') f.write('crystal') f.flush() f.close() nonpoly = dimport.get_nonpolymer_tsv() f = open(nonpoly, 'w') f.write('nonpoly') f.flush() f.close() seq = dimport.get_sequence_tsv() f = open(seq, 'w') f.write('seq') f.flush() f.close() chall.run() self.assertEqual(chall.get_error(), None) # verify test files get created errfile = os.path.join(chall.get_dir(), D3RTask.ERROR_FILE) self.assertEqual(os.path.isfile(errfile), False) compfile = os.path.join(chall.get_dir(), D3RTask.COMPLETE_FILE) self.assertEqual(os.path.isfile(compfile), True) stderr = os.path.join(chall.get_dir(), 'genchallenge.py.stderr') self.assertEqual(os.path.isfile(stderr), True) stdout = os.path.join(chall.get_dir(), 'genchallenge.py.stdout') self.assertEqual(os.path.isfile(stdout), True) # verify challenge directory is created and # filled with valid files chall_dir = os.path.join(chall.get_dir(), chall.get_celpp_challenge_data_dir_name()) self.assertEqual(os.path.isdir(chall_dir), True) readme = os.path.join(chall_dir, ChallengeDataTask.README_TXT_FILE) self.assertEqual(os.path.isfile(readme), True) crystal = os.path.join(chall_dir, DataImportTask.CRYSTALPH_TSV) self.assertEqual(os.path.isfile(crystal), True) seq = os.path.join(chall_dir, DataImportTask.SEQUENCE_TSV) self.assertEqual(os.path.isfile(seq), True) nonpoly = os.path.join(chall_dir, DataImportTask.NONPOLYMER_TSV) self.assertEqual(os.path.isfile(nonpoly), True) fivehibdir = os.path.join(chall_dir, '5hib') fivehibtxt = os.path.join(fivehibdir, '5hib.txt') self.assertEqual(os.path.isfile(fivehibtxt), True) fivehibfas = os.path.join(fivehibdir, '5hib.fasta') self.assertEqual(os.path.isfile(fivehibfas), True) fivehicdir = os.path.join(chall_dir, '5hic') fivehictxt = os.path.join(fivehicdir, '5hic.txt') self.assertEqual(os.path.isfile(fivehictxt), True) fivehicfas = os.path.join(fivehicdir, '5hic.fasta') self.assertEqual(os.path.isfile(fivehicfas), True) # verify tarball is created tfile = chall.get_celpp_challenge_data_tar_file() self.assertEqual(os.path.isfile(tfile), True) foodir = os.path.join(temp_dir, 'foo') os.mkdir(foodir) tar = tarfile.open(tfile, 'r:*') tar.extractall(path=foodir) tar.close() name = chall.get_celpp_challenge_data_dir_name() cdir = os.path.join(foodir, name) readme = os.path.join(cdir, ChallengeDataTask.README_TXT_FILE) self.assertEqual(os.path.isfile(readme), True) final = os.path.join(foodir, name, 'final.log') self.assertEqual(os.path.isfile(final), False) e_con = os.path.join(foodir, name, 'error_container') self.assertEqual(os.path.isdir(e_con), False) finally: shutil.rmtree(temp_dir)