Ejemplo n.º 1
0
    def test_run_success_with_ftp_upload(self):
        temp_dir = tempfile.mkdtemp()
        try:
            script = self.create_gen_challenge_script(temp_dir)
            params = D3RParameters()
            params.genchallenge = script
            params.pdbdb = '/foo'
            params.version = '1'

            blastnfilter = BlastNFilterTask(temp_dir, params)
            blastnfilter.create_dir()
            open(os.path.join(blastnfilter.get_dir(), D3RTask.COMPLETE_FILE),
                 'a').close()

            chall = ChallengeDataTask(temp_dir, params)
            mockftp = D3RParameters()
            mockftp.put = Mock(side_effect=[3, 5])
            ftp = FtpFileTransfer(None)
            ftp.set_remote_challenge_dir('/challenge')
            ftp.set_connection(mockftp)
            chall.set_file_transfer(ftp)

            dimport = DataImportTask(temp_dir, params)
            dimport.create_dir()

            ctsv = dimport.get_crystalph_tsv()
            f = open(ctsv, 'w')
            f.write('crystal')
            f.flush()
            f.close()

            nonpoly = dimport.get_nonpolymer_tsv()
            f = open(nonpoly, 'w')
            f.write('nonpoly')
            f.flush()
            f.close()

            seq = dimport.get_sequence_tsv()
            f = open(seq, 'w')
            f.write('seq')
            f.flush()
            f.close()

            chall.run()
            self.assertEqual(chall.get_error(), None)
            # verify test files get created
            errfile = os.path.join(chall.get_dir(),
                                   D3RTask.ERROR_FILE)
            self.assertEqual(os.path.isfile(errfile), False)

            compfile = os.path.join(chall.get_dir(),
                                    D3RTask.COMPLETE_FILE)
            self.assertEqual(os.path.isfile(compfile), True)
            ftp.disconnect()

        finally:
            shutil.rmtree(temp_dir)
Ejemplo n.º 2
0
    def test_get_uploadable_files(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            task = DataImportTask(temp_dir, params)
            self.assertEqual(task.get_uploadable_files(), [])

            task.create_dir()
            # test empty dir
            self.assertEqual(task.get_uploadable_files(), [])

            # test with only compinchi
            open(task.get_components_inchi_file(), 'a').close()
            flist = task.get_uploadable_files()
            self.assertEqual(len(flist), 1)
            flist.index(task.get_components_inchi_file())

            # test with crystal file
            open(task.get_crystalph_tsv(), 'a').close()
            flist = task.get_uploadable_files()
            self.assertEqual(len(flist), 2)
            flist.index(task.get_components_inchi_file())
            flist.index(task.get_crystalph_tsv())

            # test with nonpolymer file
            open(task.get_nonpolymer_tsv(), 'a').close()
            flist = task.get_uploadable_files()
            self.assertEqual(len(flist), 3)
            flist.index(task.get_components_inchi_file())
            flist.index(task.get_crystalph_tsv())
            flist.index(task.get_nonpolymer_tsv())

            # test with sequence file
            open(task.get_sequence_tsv(), 'a').close()
            flist = task.get_uploadable_files()
            self.assertEqual(len(flist), 4)
            flist.index(task.get_components_inchi_file())
            flist.index(task.get_crystalph_tsv())
            flist.index(task.get_nonpolymer_tsv())
            flist.index(task.get_sequence_tsv())

        finally:
            shutil.rmtree(temp_dir)
Ejemplo n.º 3
0
    def test_append_standard_to_files(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            task = DataImportTask(temp_dir, params)
            task.create_dir()
            task.append_standard_to_files()
            self.assertTrue(os.path.isfile(task.get_nonpolymer_tsv()))
            self.assertTrue(os.path.isfile(task.get_sequence_tsv()))
            self.assertTrue(os.path.isfile(task.get_crystalph_tsv()))

            # now do it again, but this time make the append fail
            # cause the nonpolymer_tsv is a directory
            os.unlink(task.get_nonpolymer_tsv())
            os.makedirs(task.get_nonpolymer_tsv())
            task.append_standard_to_files()

        finally:
            shutil.rmtree(temp_dir)
Ejemplo n.º 4
0
    def test_copy_over_tsv_files(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            dimport = DataImportTask(temp_dir, params)
            dimport.create_dir()

            ctsv = dimport.get_crystalph_tsv()
            f = open(ctsv, 'w')
            f.write('crystal')
            f.flush()
            f.close()

            nonpoly = dimport.get_nonpolymer_tsv()
            f = open(nonpoly, 'w')
            f.write('nonpoly')
            f.flush()
            f.close()

            seq = dimport.get_sequence_tsv()
            f = open(seq, 'w')
            f.write('seq')
            f.flush()
            f.close()

            task = ChallengeDataTask(temp_dir, params)
            task.create_dir()
            challenge_dir = task._create_challenge_dir()
            self.assertEqual(os.path.isdir(challenge_dir), True)
            task._copy_over_tsv_files(challenge_dir)

            cop_ctsv = os.path.join(challenge_dir,
                                    DataImportTask.CRYSTALPH_TSV)
            self.assertEqual(os.path.isfile(cop_ctsv), True)
            f = open(cop_ctsv)
            self.assertEqual(f.readline(), 'crystal')
            f.close()

            cop_nonpoly = os.path.join(challenge_dir,
                                       DataImportTask.NONPOLYMER_TSV)
            self.assertEqual(os.path.isfile(cop_nonpoly), True)
            f = open(cop_nonpoly)
            self.assertEqual(f.readline(), 'nonpoly')
            f.close()

            cop_seq = os.path.join(challenge_dir,
                                   DataImportTask.SEQUENCE_TSV)
            self.assertEqual(os.path.isfile(cop_seq), True)
            f = open(cop_seq)
            self.assertEqual(f.readline(), 'seq')
            f.close()

        finally:
            shutil.rmtree(temp_dir)
Ejemplo n.º 5
0
    def _copy_over_tsv_files(self, challenge_dir):
        """Copies over tsv files from `DataImportTask`
        """
        dataimport = DataImportTask(self.get_path(), self.get_args())

        crystal_dest = os.path.join(challenge_dir,
                                    DataImportTask.CRYSTALPH_TSV)

        if os.path.isfile(dataimport.get_crystalph_tsv()):
            logger.debug('Copying over ' + dataimport.get_crystalph_tsv() +
                         'to ' + crystal_dest)
            shutil.copyfile(dataimport.get_crystalph_tsv(), crystal_dest)
        else:
            logger.warning(dataimport.get_crystalph_tsv() +
                           ' file does not exist')

        nonpoly_dest = os.path.join(challenge_dir,
                                    DataImportTask.NONPOLYMER_TSV)

        if os.path.isfile(dataimport.get_nonpolymer_tsv()):
            logger.debug('Copying over ' + dataimport.get_nonpolymer_tsv() +
                         'to ' + nonpoly_dest)
            shutil.copyfile(dataimport.get_nonpolymer_tsv(), nonpoly_dest)
        else:
            logger.warning(dataimport.get_nonpolymer_tsv() +
                           ' file does not exist')

        seq_dest = os.path.join(challenge_dir,
                                DataImportTask.SEQUENCE_TSV)

        if os.path.isfile(dataimport.get_sequence_tsv()):
            logger.debug('Copying over ' + dataimport.get_sequence_tsv() +
                         'to ' + seq_dest)
            shutil.copyfile(dataimport.get_sequence_tsv(), seq_dest)
        else:
            logger.warning(dataimport.get_sequence_tsv() +
                           ' file does not exist')
Ejemplo n.º 6
0
    def test_run_all_success(self):
        temp_dir = tempfile.mkdtemp()
        try:
            fakeftp = FtpFileTransfer(None)
            mftp = D3RParameters()

            fakeftp.set_connection(mftp)
            fakeftp.set_remote_dir('/foo2')
            mftp.get = Mock()

            params = D3RParameters()
            params.pdbfileurl = 'file://' + temp_dir
            params.compinchi = 'file://' + temp_dir

            make_blast = MakeBlastDBTask(temp_dir, params)
            make_blast.create_dir()
            open(os.path.join(make_blast.get_dir(), D3RTask.COMPLETE_FILE),
                 'a').close()

            task = DataImportTask(temp_dir, params)
            task.set_file_transfer(fakeftp)
            task._retrysleep = 0
            open(os.path.join(temp_dir, task.NONPOLYMER_TSV), 'a').close()
            open(os.path.join(temp_dir, task.SEQUENCE_TSV), 'a').close()
            open(os.path.join(temp_dir, task.OLDSEQUENCE_TSV), 'a').close()
            open(os.path.join(temp_dir, task.CRYSTALPH_TSV), 'a').close()
            open(os.path.join(temp_dir, task.COMPINCHI_ICH), 'a').close()

            task.run()
            self.assertEquals(task.get_error(), None)

            # check line count is 1 now which indicates
            # standard was added
            self.assertEqual(
                util.get_file_line_count(task.get_nonpolymer_tsv()), 1)
            self.assertEqual(util.get_file_line_count(task.get_sequence_tsv()),
                             1)
            self.assertEqual(
                util.get_file_line_count(task.get_oldsequence_tsv()), 1)
            self.assertEqual(
                util.get_file_line_count(task.get_crystalph_tsv()), 1)

            mftp.get.assert_called_with('/foo2/' +
                                        DataImportTask.PARTICIPANT_LIST_CSV,
                                        local=task.get_participant_list_csv())
        finally:
            shutil.rmtree(temp_dir)
Ejemplo n.º 7
0
    def test_tar_challenge_dir(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            dimport = DataImportTask(temp_dir, params)
            dimport.create_dir()
            open(dimport.get_sequence_tsv(), 'a').close()
            open(dimport.get_nonpolymer_tsv(), 'a').close()
            open(dimport.get_crystalph_tsv(), 'a').close()

            task = ChallengeDataTask(temp_dir, params)
            task.create_dir()
            chall_dir = task._create_challenge_dir()

            final_log = os.path.join(chall_dir, 'final.log')
            open(final_log, 'a').close()

            task._create_readme(chall_dir)
            task._copy_over_tsv_files(chall_dir)

            # make a fake candidate
            file_list = self.make_fake_candidate_dir(chall_dir, '5hib',
                                                     '2eb2', 'CSX')

            self.assertEqual(os.path.isdir(chall_dir), True)
            name = task.get_celpp_challenge_data_dir_name()
            tfile = task._tar_challenge_dir(name)
            self.assertEqual(os.path.isfile(tfile), True)

            foodir = os.path.join(temp_dir, 'foo')
            tar = tarfile.open(tfile, 'r:*')
            tar.extractall(path=foodir)
            tar.close()
            cdir = os.path.join(foodir, name)
            readme = os.path.join(cdir, ChallengeDataTask.README_TXT_FILE)
            self.assertEqual(os.path.isfile(readme), True)

            final_log = os.path.join(foodir, 'final.log')
            self.assertEqual(os.path.isfile(final_log), False)

            for fname in file_list:
                chk = os.path.join(cdir, fname)
                self.assertEqual(os.path.isfile(chk), True, chk)

        finally:
            shutil.rmtree(temp_dir)
Ejemplo n.º 8
0
    def test_run_all_nonpolymer_fail(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            params.pdbfileurl = 'file://' + temp_dir
            params.compinchi = 'file://' + temp_dir

            make_blast = MakeBlastDBTask(temp_dir, params)
            make_blast.create_dir()
            open(os.path.join(make_blast.get_dir(), D3RTask.COMPLETE_FILE),
                 'a').close()

            task = DataImportTask(temp_dir, params)
            task._retrysleep = 0
            task.run()
            self.assertEquals(
                task.get_error(), 'Unable to download file ' + 'from ' +
                params.pdbfileurl + ' to ' + task.get_nonpolymer_tsv())
        finally:
            shutil.rmtree(temp_dir)
Ejemplo n.º 9
0
    def test_run_all_success_except_participant_download_fails(self):
        temp_dir = tempfile.mkdtemp()
        try:
            params = D3RParameters()
            params.pdbfileurl = 'file://' + temp_dir
            params.compinchi = 'file://' + temp_dir

            make_blast = MakeBlastDBTask(temp_dir, params)
            make_blast.create_dir()
            open(os.path.join(make_blast.get_dir(), D3RTask.COMPLETE_FILE),
                 'a').close()

            task = DataImportTask(temp_dir, params)
            task._retrysleep = 0
            open(os.path.join(temp_dir, task.NONPOLYMER_TSV), 'a').close()
            open(os.path.join(temp_dir, task.SEQUENCE_TSV), 'a').close()
            open(os.path.join(temp_dir, task.OLDSEQUENCE_TSV), 'a').close()
            open(os.path.join(temp_dir, task.CRYSTALPH_TSV), 'a').close()
            open(os.path.join(temp_dir, task.COMPINCHI_ICH), 'a').close()

            task.run()
            self.assertEquals(task.get_error(), None)

            # check line count is 1 now which indicates
            # standard was added
            self.assertEqual(
                util.get_file_line_count(task.get_nonpolymer_tsv()), 1)
            self.assertEqual(util.get_file_line_count(task.get_sequence_tsv()),
                             1)
            self.assertEqual(
                util.get_file_line_count(task.get_oldsequence_tsv()), 1)
            self.assertEqual(
                util.get_file_line_count(task.get_crystalph_tsv()), 1)

            self.assertTrue(task.get_email_log().startswith(
                '\nWARNING: Unable to download'))
        finally:
            shutil.rmtree(temp_dir)
Ejemplo n.º 10
0
    def run(self):
        """Runs blastnfilter task after verifying dataimport was good

           Method requires can_run() to be called before hand with
           successful outcome
           Otherwise method invokes D3RTask.start then this method
           creates a directory and invokes blastnfilter script and
           postanalysis script.  Upon completion results are
           analyzed and success or error status is set
           appropriately and D3RTask.end is invoked
           """
        super(BlastNFilterTask, self).run()

        if self._can_run is False:
            logger.debug(self.get_dir_name() +
                         ' cannot run cause _can_run flag '
                         'is False')
            return

        data_import = DataImportTask(self._path, self._args)

        make_blastdb = MakeBlastDBTask(self._path, self._args)

        try:
            loglevel = self.get_args().loglevel
        except AttributeError:
            logger.debug('No log level set in arguments using WARNING')
            loglevel = 'WARNING'

        # verify sequence.tsv file exists on filesystem.
        # if not fall back to oldsequence.tsv file
        sequencetsv = data_import.get_sequence_tsv()
        if not os.path.isfile(sequencetsv):
            logger.warning(sequencetsv + ' file not found. falling '
                           'back to old file')
            self.append_to_email_log('\n ' + sequencetsv + ' file not found ' +
                                     'falling back to ' +
                                     data_import.get_oldsequence_tsv() + '\n')
            sequencetsv = data_import.get_oldsequence_tsv()

        cmd_to_run = (self.get_args().blastnfilter + ' --nonpolymertsv ' +
                      data_import.get_nonpolymer_tsv() + ' --sequencetsv ' +
                      sequencetsv + ' --pdbblastdb ' + make_blastdb.get_dir() +
                      ' --compinchi ' +
                      data_import.get_components_inchi_file() +
                      ' --crystalpH ' + data_import.get_crystalph_tsv() +
                      ' --pdbdb ' + self.get_args().pdbdb + ' --log ' +
                      loglevel + ' --outdir ' + self.get_dir())

        blastnfilter_name = os.path.basename(self.get_args().blastnfilter)

        self.run_external_command(
            blastnfilter_name,
            cmd_to_run,
            False,
        )

        self.set_status(D3RTask.COMPLETE_STATUS)

        cmd_to_run = (self.get_args().postanalysis + ' --compinchi ' +
                      data_import.get_components_inchi_file() + ' ' +
                      self.get_dir())

        postanalysis_name = os.path.basename(self.get_args().postanalysis)

        self.run_external_command(postanalysis_name, cmd_to_run, False)

        try:
            # examine output to get candidate hit count DR-12
            hit_stats = self._parse_blastnfilter_output_for_hit_stats()
            if hit_stats is not None:
                self.append_to_email_log(hit_stats)
        except Exception:
            logger.exception("Error caught exception")

        # assess the result
        self.end()
Ejemplo n.º 11
0
 def test_get_nonpolymer_tsv(self):
     params = D3RParameters()
     task = DataImportTask('/foo', params)
     self.assertEqual(
         task.get_nonpolymer_tsv(), '/foo/' + task.get_dir_name() +
         '/new_release_structure_nonpolymer.tsv')
Ejemplo n.º 12
0
    def test_run_succeeds_no_ftp(self):
        temp_dir = tempfile.mkdtemp()
        try:
            script = self.create_gen_challenge_script(temp_dir)
            params = D3RParameters()
            params.genchallenge = script
            params.pdbdb = '/foo'
            params.version = '1'

            blastnfilter = BlastNFilterTask(temp_dir, params)
            blastnfilter.create_dir()
            open(os.path.join(blastnfilter.get_dir(), D3RTask.COMPLETE_FILE),
                 'a').close()
            chall = ChallengeDataTask(temp_dir, params)

            dimport = DataImportTask(temp_dir, params)
            dimport.create_dir()

            ctsv = dimport.get_crystalph_tsv()
            f = open(ctsv, 'w')
            f.write('crystal')
            f.flush()
            f.close()

            nonpoly = dimport.get_nonpolymer_tsv()
            f = open(nonpoly, 'w')
            f.write('nonpoly')
            f.flush()
            f.close()

            seq = dimport.get_sequence_tsv()
            f = open(seq, 'w')
            f.write('seq')
            f.flush()
            f.close()

            chall.run()
            self.assertEqual(chall.get_error(), None)
            # verify test files get created
            errfile = os.path.join(chall.get_dir(),
                                   D3RTask.ERROR_FILE)
            self.assertEqual(os.path.isfile(errfile), False)

            compfile = os.path.join(chall.get_dir(),
                                    D3RTask.COMPLETE_FILE)
            self.assertEqual(os.path.isfile(compfile), True)
            stderr = os.path.join(chall.get_dir(),
                                  'genchallenge.py.stderr')
            self.assertEqual(os.path.isfile(stderr), True)
            stdout = os.path.join(chall.get_dir(),
                                  'genchallenge.py.stdout')
            self.assertEqual(os.path.isfile(stdout), True)

            # verify challenge directory is created and
            # filled with valid files
            chall_dir = os.path.join(chall.get_dir(),
                                     chall.get_celpp_challenge_data_dir_name())
            self.assertEqual(os.path.isdir(chall_dir), True)
            readme = os.path.join(chall_dir, ChallengeDataTask.README_TXT_FILE)
            self.assertEqual(os.path.isfile(readme), True)

            crystal = os.path.join(chall_dir, DataImportTask.CRYSTALPH_TSV)
            self.assertEqual(os.path.isfile(crystal), True)

            seq = os.path.join(chall_dir, DataImportTask.SEQUENCE_TSV)
            self.assertEqual(os.path.isfile(seq), True)

            nonpoly = os.path.join(chall_dir, DataImportTask.NONPOLYMER_TSV)
            self.assertEqual(os.path.isfile(nonpoly), True)

            fivehibdir = os.path.join(chall_dir, '5hib')
            fivehibtxt = os.path.join(fivehibdir, '5hib.txt')
            self.assertEqual(os.path.isfile(fivehibtxt), True)
            fivehibfas = os.path.join(fivehibdir, '5hib.fasta')
            self.assertEqual(os.path.isfile(fivehibfas), True)

            fivehicdir = os.path.join(chall_dir, '5hic')
            fivehictxt = os.path.join(fivehicdir, '5hic.txt')
            self.assertEqual(os.path.isfile(fivehictxt), True)
            fivehicfas = os.path.join(fivehicdir, '5hic.fasta')
            self.assertEqual(os.path.isfile(fivehicfas), True)

            # verify tarball is created
            tfile = chall.get_celpp_challenge_data_tar_file()
            self.assertEqual(os.path.isfile(tfile), True)

            foodir = os.path.join(temp_dir, 'foo')
            os.mkdir(foodir)

            tar = tarfile.open(tfile, 'r:*')
            tar.extractall(path=foodir)
            tar.close()
            name = chall.get_celpp_challenge_data_dir_name()
            cdir = os.path.join(foodir, name)
            readme = os.path.join(cdir, ChallengeDataTask.README_TXT_FILE)
            self.assertEqual(os.path.isfile(readme), True)

            final = os.path.join(foodir, name, 'final.log')
            self.assertEqual(os.path.isfile(final), False)

            e_con = os.path.join(foodir, name, 'error_container')
            self.assertEqual(os.path.isdir(e_con), False)

        finally:
            shutil.rmtree(temp_dir)