def test_get_set_of_pbdid_from_pdb_seqres_txt_with_400k_file(self): temp_dir = tempfile.mkdtemp() try: params = D3RParameters() task = MakeBlastDBTask(temp_dir, params) task.create_dir() f = open(task.get_pdb_seqres_txt(), 'w') perms = itertools.permutations( string.ascii_lowercase + '123456789', 4) counter = 0 try: limit = 400000 while counter < limit: f.write('>' + ''.join(map(str, perms.next())) + '_A mol:protein length:165 T4 LYSOZYME\n') f.write('MVLSEGEWQLVLH\n') counter += 1 except StopIteration: pass f.flush() f.close() pdbset = task.get_set_of_pbdid_from_pdb_seqres_txt() self.assertEqual(len(pdbset), counter) finally: shutil.rmtree(temp_dir)
def test_run_all_compinchi_fail(self): temp_dir = tempfile.mkdtemp() try: params = D3RParameters() params.pdbfileurl = 'file://' + temp_dir params.compinchi = 'file://' + temp_dir make_blast = MakeBlastDBTask(temp_dir, params) make_blast.create_dir() open(os.path.join(make_blast.get_dir(), D3RTask.COMPLETE_FILE), 'a').close() task = DataImportTask(temp_dir, params) task._retrysleep = 0 open(os.path.join(temp_dir, task.NONPOLYMER_TSV), 'a').close() open(os.path.join(temp_dir, task.SEQUENCE_TSV), 'a').close() open(os.path.join(temp_dir, task.OLDSEQUENCE_TSV), 'a').close() open(os.path.join(temp_dir, task.CRYSTALPH_TSV), 'a').close() task.run() self.assertEquals( task.get_error(), 'Unable to download file ' + 'from ' + params.compinchi + ' to ' + task.get_components_inchi_file()) finally: shutil.rmtree(temp_dir)
def test_get_set_of_pdbid_in_crystalph_tsv_and_pdb_seqres_w_hits(self): temp_dir = tempfile.mkdtemp() try: params = D3RParameters() task = DataImportTask(temp_dir, params) task.create_dir() f = open(task.get_crystalph_tsv(), 'w') f.write('PDB_ID _exptl_crystal_grow.pH\n') f.write('4X09\t6.5\n') f.write('4rfr\t8\n') f.write('4XET\t6.2\n') f.write('4XF1\t6.2\n') f.write('4XF3\t6.2\n') f.flush() f.close() makeblast = MakeBlastDBTask(temp_dir, params) makeblast.create_dir() f = open(makeblast.get_pdb_seqres_txt(), 'w') f.write('>4rfr_A mol:protein length:154 MYOGLOBIN\n') f.write('MVLSEGEWQLVLHVWAKVEADVAGHGQDILIRLFKSHPETLEKFDRVK' 'HLKTEAEMKASEDLKKHG\n') f.write('>102l_A mol:protein length:165 T4 LYSOZYME\n') f.write('MNIFEMLRIDEGLRLKIYKDTEGYYTIGIGHLLTKSPSLNAAAKSELDKA' 'IGRNTNGVITKDEAEKLFNQDVDAAVRGILRNAKLKPVYDSLDAVRRAAL' 'INMVFQMGETGVAGFTNSLRMLQQKRWDEAAVNLAKSRWYNQTPNRAKRV' 'ITTFRTGTWDAYKNL\n') f.flush() f.close() pdbset = task.get_set_of_pdbid_in_crystalph_tsv_and_pdb_seqres() self.assertEqual(len(pdbset), 1) self.assertEqual('4RFR' in pdbset, True) finally: shutil.rmtree(temp_dir)
def test_can_run_where_task_failed(self): temp_dir = tempfile.mkdtemp() try: params = D3RParameters() task = MakeBlastDBTask(temp_dir, params) task.create_dir() open(os.path.join(task.get_dir(), 'error'), 'a').close() self.assertEqual(task.can_run(), False) finally: shutil.rmtree(temp_dir)
def test_get_set_of_pbdid_from_pdb_seqres_txt_no_file(self): temp_dir = tempfile.mkdtemp() try: params = D3RParameters() task = MakeBlastDBTask(temp_dir, params) task.create_dir() pdbset = task.get_set_of_pbdid_from_pdb_seqres_txt() self.assertEqual(len(pdbset), 0) finally: shutil.rmtree(temp_dir)
def test_get_sequence_count_file_has_zero_size(self): temp_dir = tempfile.mkdtemp() try: params = D3RParameters() task = MakeBlastDBTask(temp_dir, params) task.create_dir() open(task.get_pdb_seqres_txt(), 'a').close() self.assertEqual(task._get_sequence_count_message(), '# sequence(s): 0') finally: shutil.rmtree(temp_dir)
def test_get_set_of_pbdid_from_pdb_seqres_txt_empty_file(self): temp_dir = tempfile.mkdtemp() try: params = D3RParameters() task = MakeBlastDBTask(temp_dir, params) task.create_dir() open(task.get_pdb_seqres_txt(), 'a').close() self.assertEqual(os.path.isfile(task.get_pdb_seqres_txt()), True) pdbset = task.get_set_of_pbdid_from_pdb_seqres_txt() self.assertEqual(len(pdbset), 0) finally: shutil.rmtree(temp_dir)
def test_get_sequence_count_file_has_multiple_seqs(self): temp_dir = tempfile.mkdtemp() try: params = D3RParameters() task = MakeBlastDBTask(temp_dir, params) task.create_dir() f = open(task.get_pdb_seqres_txt(), 'w') f.write('>hi\n>seq\n>are\n') f.flush() f.close() self.assertEqual(task._get_sequence_count_message(), '# sequence(s): 3') finally: shutil.rmtree(temp_dir)
def test_get_set_of_pbdid_from_pdb_seqres_txt_file_no_seqs(self): temp_dir = tempfile.mkdtemp() try: params = D3RParameters() task = MakeBlastDBTask(temp_dir, params) task.create_dir() f = open(task.get_pdb_seqres_txt(), 'w') f.write('hi\nhow\nare\nyou') f.flush() f.close() self.assertEqual(os.path.isfile(task.get_pdb_seqres_txt()), True) pdbset = task.get_set_of_pbdid_from_pdb_seqres_txt() self.assertEqual(len(pdbset), 0) finally: shutil.rmtree(temp_dir)
def test_run_all_success(self): temp_dir = tempfile.mkdtemp() try: fakeftp = FtpFileTransfer(None) mftp = D3RParameters() fakeftp.set_connection(mftp) fakeftp.set_remote_dir('/foo2') mftp.get = Mock() params = D3RParameters() params.pdbfileurl = 'file://' + temp_dir params.compinchi = 'file://' + temp_dir make_blast = MakeBlastDBTask(temp_dir, params) make_blast.create_dir() open(os.path.join(make_blast.get_dir(), D3RTask.COMPLETE_FILE), 'a').close() task = DataImportTask(temp_dir, params) task.set_file_transfer(fakeftp) task._retrysleep = 0 open(os.path.join(temp_dir, task.NONPOLYMER_TSV), 'a').close() open(os.path.join(temp_dir, task.SEQUENCE_TSV), 'a').close() open(os.path.join(temp_dir, task.OLDSEQUENCE_TSV), 'a').close() open(os.path.join(temp_dir, task.CRYSTALPH_TSV), 'a').close() open(os.path.join(temp_dir, task.COMPINCHI_ICH), 'a').close() task.run() self.assertEquals(task.get_error(), None) # check line count is 1 now which indicates # standard was added self.assertEqual( util.get_file_line_count(task.get_nonpolymer_tsv()), 1) self.assertEqual(util.get_file_line_count(task.get_sequence_tsv()), 1) self.assertEqual( util.get_file_line_count(task.get_oldsequence_tsv()), 1) self.assertEqual( util.get_file_line_count(task.get_crystalph_tsv()), 1) mftp.get.assert_called_with('/foo2/' + DataImportTask.PARTICIPANT_LIST_CSV, local=task.get_participant_list_csv()) finally: shutil.rmtree(temp_dir)
def test_can_run_does_not_exist_or_error(self): temp_dir = tempfile.mkdtemp() try: params = D3RParameters() task = DataImportTask(temp_dir, params) # no make blast db self.assertEquals(task.can_run(), False) self.assertEquals(task.get_error(), 'makeblastdb task has notfound status') self.assertEquals(task._can_run, False) make_blast = MakeBlastDBTask(temp_dir, params) make_blast.create_dir() # make blast db failed err_file = os.path.join(make_blast.get_dir(), D3RTask.ERROR_FILE) open(err_file, 'a').close() self.assertEquals(task.can_run(), False) self.assertEquals(task.get_error(), 'makeblastdb task has error status') self.assertEquals(task._can_run, False) os.remove(err_file) # make blast db success open(os.path.join(make_blast.get_dir(), D3RTask.COMPLETE_FILE), 'a').close() self.assertEquals(task.can_run(), True) self.assertEquals(task.get_error(), None) self.assertEquals(task._can_run, True) task.create_dir() open(os.path.join(task.get_dir(), D3RTask.ERROR_FILE), 'a').close() self.assertEquals(task.can_run(), False) self.assertEquals(task._can_run, False) self.assertEquals( task.get_error(), task.get_dir_name() + ' already exists and ' + 'status is ' + D3RTask.ERROR_STATUS) finally: shutil.rmtree(temp_dir)
def test_get_set_of_pbdid_from_pdb_seqres_txt_with_seqs(self): temp_dir = tempfile.mkdtemp() try: params = D3RParameters() task = MakeBlastDBTask(temp_dir, params) task.create_dir() f = open(task.get_pdb_seqres_txt(), 'w') f.write('>101m_A mol:protein length:154 MYOGLOBIN\n') f.write('MVLSEGEWQLVLHVWAKVEADVAGHGQDILIRLFKSHPETLEKFDRVK' 'HLKTEAEMKASEDLKKHG\n') f.write('>102l_A mol:protein length:165 T4 LYSOZYME\n') f.write('MNIFEMLRIDEGLRLKIYKDTEGYYTIGIGHLLTKSPSLNAAAKSELDKA' 'IGRNTNGVITKDEAEKLFNQDVDAAVRGILRNAKLKPVYDSLDAVRRAAL' 'INMVFQMGETGVAGFTNSLRMLQQKRWDEAAVNLAKSRWYNQTPNRAKRV' 'ITTFRTGTWDAYKNL\n') f.write('>102l_A mol:protein length:154 MYOGLOBIN\n') f.write('MVLSEGEWQLVLHVWAKVEADVAGHGQDILIRLFKSHPETLEKFDRFKHL' 'KTEAEMKASEDLKKAGVTVLTALGAILKKKGHHEAELKPLAQSHATKHKI' 'PIKYLEFISEAIIHVLHSRHPGNFGADAQGAMNKALELFRKDIAAKYKELGYQG\n') f.write('>103l_A mol:protein length:167 T4 LYSOZYME\n') f.write('MNIFEMLRIDEGLRLKIYKDTEGYYTIGIGHLLTKSPSLNSLDAAKSELD' 'KAIGRNTNGVITKDEAEKLFNQDVDAAVRGILRNAKLKPVYDSLDAVRRA' 'ALINMVFQMGETGVAGFTNSLRMLQQKRWDEAAVNLAKSRWYNQTPNRAK' 'RVITTFRTGTWDAYKNL\n') f.write('>10jj3m_A mol:protein length:154 MYOGLOBIN\n') f.write('MVLSEGEWQLVLHVWAKVEADVAGHGQDILIRLFKSHPETLEKFDRF\n') f.write('>104l_A mol:protein length:166 T4 LYSOZYME\n') f.write('MNIFEMLRIDEGLRLKIYKDTEGYYTIGIGHLLTKSPSLNAAKSAAE\n') f.write('>104l_B mol:protein length:166 T4 LYSOZYME\n') f.write('MNIFEMLRIDEGLRLKIYKDTEGYYTIGIGHLLTKSPSLNAAKSAKNL\n') f.flush() f.close() self.assertEqual(os.path.isfile(task.get_pdb_seqres_txt()), True) pdbset = task.get_set_of_pbdid_from_pdb_seqres_txt() self.assertEqual(len(pdbset), 4) self.assertEqual('101M' in pdbset, True) self.assertEqual('102L' in pdbset, True) self.assertEqual('103L' in pdbset, True) self.assertEqual('104L' in pdbset, True) finally: shutil.rmtree(temp_dir)
def test_run_all_nonpolymer_fail(self): temp_dir = tempfile.mkdtemp() try: params = D3RParameters() params.pdbfileurl = 'file://' + temp_dir params.compinchi = 'file://' + temp_dir make_blast = MakeBlastDBTask(temp_dir, params) make_blast.create_dir() open(os.path.join(make_blast.get_dir(), D3RTask.COMPLETE_FILE), 'a').close() task = DataImportTask(temp_dir, params) task._retrysleep = 0 task.run() self.assertEquals( task.get_error(), 'Unable to download file ' + 'from ' + params.pdbfileurl + ' to ' + task.get_nonpolymer_tsv()) finally: shutil.rmtree(temp_dir)
def test_run_all_success_except_participant_download_fails(self): temp_dir = tempfile.mkdtemp() try: params = D3RParameters() params.pdbfileurl = 'file://' + temp_dir params.compinchi = 'file://' + temp_dir make_blast = MakeBlastDBTask(temp_dir, params) make_blast.create_dir() open(os.path.join(make_blast.get_dir(), D3RTask.COMPLETE_FILE), 'a').close() task = DataImportTask(temp_dir, params) task._retrysleep = 0 open(os.path.join(temp_dir, task.NONPOLYMER_TSV), 'a').close() open(os.path.join(temp_dir, task.SEQUENCE_TSV), 'a').close() open(os.path.join(temp_dir, task.OLDSEQUENCE_TSV), 'a').close() open(os.path.join(temp_dir, task.CRYSTALPH_TSV), 'a').close() open(os.path.join(temp_dir, task.COMPINCHI_ICH), 'a').close() task.run() self.assertEquals(task.get_error(), None) # check line count is 1 now which indicates # standard was added self.assertEqual( util.get_file_line_count(task.get_nonpolymer_tsv()), 1) self.assertEqual(util.get_file_line_count(task.get_sequence_tsv()), 1) self.assertEqual( util.get_file_line_count(task.get_oldsequence_tsv()), 1) self.assertEqual( util.get_file_line_count(task.get_crystalph_tsv()), 1) self.assertTrue(task.get_email_log().startswith( '\nWARNING: Unable to download')) finally: shutil.rmtree(temp_dir)
def test_get_uploadable_files(self): temp_dir = tempfile.mkdtemp() try: params = D3RParameters() task = MakeBlastDBTask(temp_dir, params) task.create_dir() # test on empty dir self.assertEqual(task.get_uploadable_files(), []) # test with stderr/stdout files stdout = os.path.join(task.get_dir(), 'makeblastdb.stdout') open(stdout, 'a').close() stderr = os.path.join(task.get_dir(), 'makeblastdb.stderr') open(stderr, 'a').close() flist = task.get_uploadable_files() self.assertEqual(len(flist), 2) flist.index(stdout) flist.index(stderr) # test with pdb_seqres.txt.gz file open(task.get_pdb_seqres_txt_gz(), 'a').close() flist = task.get_uploadable_files() self.assertEqual(len(flist), 3) flist.index(stdout) flist.index(stderr) flist.index(task.get_pdb_seqres_txt_gz()) os.unlink(task.get_pdb_seqres_txt_gz()) # test where pdb_seqres.txt is a dir unlikely but why not check os.makedirs(task.get_pdb_seqres_txt_gz()) flist = task.get_uploadable_files() self.assertEqual(len(flist), 2) flist.index(stdout) flist.index(stderr) finally: shutil.rmtree(temp_dir)
def test_get_set_of_pbdid_from_pdb_seqres_txt_wrong_len_pdbids(self): temp_dir = tempfile.mkdtemp() try: params = D3RParameters() task = MakeBlastDBTask(temp_dir, params) task.create_dir() f = open(task.get_pdb_seqres_txt(), 'w') f.write('>1m_A mol:protein length:154 MYOGLOBIN\n') f.write('MVLSEGEWQLVLHVWAKVEADVAGHGQDILIRLFKSHPETLEKFDRVK' 'HLKTEAEMKASEDLKKHG\n') f.write('>abcdel_A mol:protein length:165 T4 LYSOZYME\n') f.write('MNIFEMLRIDEGLRLKIYKDTEGYYTIGIGHLLTKSPSLNAAAKSELDKA' 'IGRNTNGVITKDEAEKLFNQDVDAAVRGILRNAKLKPVYDSLDAVRRAAL' 'INMVFQMGETGVAGFTNSLRMLQQKRWDEAAVNLAKSRWYNQTPNRAKRV' 'ITTFRTGTWDAYKNL\n') f.flush() f.close() self.assertEqual(os.path.isfile(task.get_pdb_seqres_txt()), True) pdbset = task.get_set_of_pbdid_from_pdb_seqres_txt() self.assertEqual(len(pdbset), 0) finally: shutil.rmtree(temp_dir)
def test_get_set_of_pdbid_in_crystalph_tsv_and_pdb_seqres_empty_seq(self): temp_dir = tempfile.mkdtemp() try: params = D3RParameters() task = DataImportTask(temp_dir, params) task.create_dir() f = open(task.get_crystalph_tsv(), 'w') f.write('PDB_ID _exptl_crystal_grow.pH\n') f.write('4X09\t6.5\n') f.write('4rfr\t8\n') f.write('4XET\t6.2\n') f.write('4XF1\t6.2\n') f.write('4XF3\t6.2\n') f.flush() f.close() makeblast = MakeBlastDBTask(temp_dir, params) makeblast.create_dir() open(makeblast.get_pdb_seqres_txt(), 'a').close() pdbset = task.get_set_of_pdbid_in_crystalph_tsv_and_pdb_seqres() self.assertEqual(len(pdbset), 0) finally: shutil.rmtree(temp_dir)
def test_can_run(self): tempDir = tempfile.mkdtemp() try: # try where makeblastdb is not complete params = D3RParameters() blastTask = BlastNFilterTask(tempDir, params) self.assertEqual(blastTask.can_run(), False) # try where makeblastdb failed blastDb = MakeBlastDBTask(tempDir, params) blastDb.create_dir() errorFile = os.path.join(blastDb.get_path(), blastDb.get_dir_name(), D3RTask.ERROR_FILE) open(errorFile, 'a').close() self.assertEqual(blastTask.can_run(), False) self.assertEqual(blastTask.get_error(), 'makeblastdb task has error status') # try where data import is not complete completeFile = os.path.join(blastDb.get_path(), blastDb.get_dir_name(), D3RTask.COMPLETE_FILE) open(completeFile, 'a').close() self.assertEqual(blastTask.can_run(), False) self.assertEqual(blastTask.get_error(), 'dataimport task has ' + 'notfound status') # try where data import failed dataImport = DataImportTask(tempDir, params) dataImport.create_dir() errorFile = os.path.join(dataImport.get_path(), dataImport.get_dir_name(), D3RTask.ERROR_FILE) open(errorFile, 'a').close() self.assertEqual(blastTask.can_run(), False) self.assertEqual(blastTask.get_error(), 'dataimport task has error status') # try where blast can run os.remove(errorFile) completeFile = os.path.join(dataImport.get_dir(), D3RTask.COMPLETE_FILE) open(completeFile, 'a').close() self.assertEqual(blastTask.can_run(), True) self.assertEqual(blastTask.get_error(), None) # try where blast exists blastTask.create_dir() self.assertEqual(blastTask.can_run(), False) self.assertEqual( blastTask.get_error(), blastTask.get_dir_name() + ' already exists and' + ' status is unknown') # try where blast is complete completeFile = os.path.join(blastTask.get_path(), blastTask.get_dir_name(), D3RTask.COMPLETE_FILE) open(completeFile, 'a').close() self.assertEqual(blastTask.can_run(), False) self.assertEqual(blastTask.get_error(), None) finally: shutil.rmtree(tempDir)