예제 #1
0
 def collect_result(self, session):
     """Collect result from processed workers."""
     try:
         workers, submissions = zip(*[
             self._processing_worker_queue.get()
             for _ in range(self._processing_worker_queue.qsize())
         ])
     except ValueError:
         logger.info('No workers are currently waiting or processed.')
         if self.hunger_policy == 'sleep':
             time.sleep(5)
         elif self.hunger_policy == 'exit':
             self._poison_pill = True
         return
     for worker, (submission_id,
                  submission_name) in zip(workers, submissions):
         if worker.status == 'running':
             self._processing_worker_queue.put_nowait(
                 (worker, (submission_id, submission_name)))
             logger.info('Worker {} is still running'.format(worker))
             time.sleep(0)
         else:
             logger.info('Collecting results from worker {}'.format(worker))
             returncode, stderr = worker.collect_results()
             set_submission_state(
                 session, submission_id,
                 'tested' if not returncode else 'training_error')
             set_submission_error_msg(session, submission_id, stderr)
             self._processed_submission_queue.put_nowait(
                 (submission_id, submission_name))
             worker.teardown()
예제 #2
0
def test_check_submission_error_msg(session_scope_module):
    # check both get_submission_error_msg and set_submission_error_msg
    submission_id = 1
    expected_err_msg = 'tag submission as failed'
    set_submission_error_msg(session_scope_module, submission_id,
                             expected_err_msg)
    err_msg = get_submission_error_msg(session_scope_module, submission_id)
    assert err_msg == expected_err_msg
예제 #3
0
    def collect_result(self, session):
        """Collect result from processed workers."""
        try:
            workers, submissions = zip(*[
                self._processing_worker_queue.get()
                for _ in range(self._processing_worker_queue.qsize())
            ])
        except ValueError:
            if self.hunger_policy == 'sleep':
                time.sleep(5)
            elif self.hunger_policy == 'exit':
                self._poison_pill = True
            return

        for worker, (submission_id,
                     submission_name) in zip(workers, submissions):
            dt = worker.time_since_last_status_check()
            if (dt is not None) and (dt < self.time_between_collection):
                self._processing_worker_queue.put_nowait(
                    (worker, (submission_id, submission_name)))
                time.sleep(0)
                continue
            elif worker.status == 'running':
                self._processing_worker_queue.put_nowait(
                    (worker, (submission_id, submission_name)))
                time.sleep(0)
            elif worker.status == 'retry':
                set_submission_state(session, submission_id, 'new')
                self._logger.info(
                    f'Submission: {submission_id} has been interrupted. '
                    'It will be added to queue again and retried.')
                worker.teardown()
            else:
                self._logger.info(f'Collecting results from worker {worker}')
                returncode, stderr = worker.collect_results()

                if returncode:
                    if returncode == 124:
                        self._logger.info(
                            f'Worker {worker} killed due to timeout.')
                        submission_status = 'training_error'
                    elif returncode == 2:
                        # Error occurred when downloading the logs
                        submission_status = 'checking_error'
                    else:
                        self._logger.info(
                            f'Worker {worker} killed due to an error '
                            f'during training: {stderr}')
                        submission_status = 'training_error'
                else:
                    submission_status = 'tested'
                set_submission_state(session, submission_id, submission_status)
                set_submission_error_msg(session, submission_id, stderr)
                self._processed_submission_queue.put_nowait(
                    (submission_id, submission_name))
                worker.teardown()
예제 #4
0
    def collect_result(self, session):
        """Collect result from processed workers."""
        try:
            workers, submissions = zip(*[
                self._processing_worker_queue.get()
                for _ in range(self._processing_worker_queue.qsize())
            ])
        except ValueError:
            if self.hunger_policy == 'sleep':
                time.sleep(5)
            elif self.hunger_policy == 'exit':
                self._poison_pill = True
            return

        for worker, (submission_id,
                     submission_name) in zip(workers, submissions):
            dt = worker.time_since_last_status_check()
            if dt is not None and dt < self.time_between_collection:
                self._processing_worker_queue.put_nowait(
                    (worker, (submission_id, submission_name)))
                time.sleep(0)
                continue
            elif worker.status == 'running':
                self._processing_worker_queue.put_nowait(
                    (worker, (submission_id, submission_name)))
                time.sleep(0)
            else:
                logger.info(f'Collecting results from worker {worker}')
                returncode, stderr = worker.collect_results()
                if returncode:
                    if returncode == 124:
                        logger.info(
                            'Worker {} killed due to timeout.'.format(worker))
                    else:
                        logger.info(f'Worker {worker} killed due to an error '
                                    'during training')
                    submission_status = 'training_error'
                else:
                    submission_status = 'tested'

                set_submission_state(session, submission_id, submission_status)
                set_submission_error_msg(session, submission_id, stderr)
                self._processed_submission_queue.put_nowait(
                    (submission_id, submission_name))
                worker.teardown()
예제 #5
0
    def launch_workers(self, session):
        """Launch the awaiting workers if possible."""
        while (not self._processing_worker_queue.full()
               and not self._awaiting_worker_queue.empty()):
            worker, (submission_id, submission_name) = \
                self._awaiting_worker_queue.get()
            self._logger.info(f'Starting worker: {worker}')

            try:
                worker.setup()
                if worker.status != "error":
                    worker.launch_submission()
            except Exception as e:
                self._logger.error(
                    f'Worker finished with unhandled exception:\n {e}')
                worker.status = 'error'
            if worker.status == 'error':
                set_submission_state(session, submission_id, 'checking_error')
                worker.teardown()  # kill the worker
                self._logger.info(f'Worker {worker} killed due to an error '
                                  f'while connecting to AWS worker')
                stderr = ("There was a problem with sending your submission"
                          " for training. This problem is on RAMP side"
                          " and most likely it is not related to your"
                          " code. If this happened for the first time"
                          " to this submission you might"
                          " consider submitting the same code once again."
                          " Else, please contact the event organizers.")
                set_submission_error_msg(session, submission_id, stderr)
                continue
            set_submission_state(session, submission_id, 'training')
            submission = get_submission_by_id(session, submission_id)
            update_user_leaderboards(
                session,
                self._ramp_config['event_name'],
                submission.team.name,
                new_only=True,
            )
            self._processing_worker_queue.put_nowait(
                (worker, (submission_id, submission_name)))
            self._logger.info(
                f'Store the worker {worker} into the processing queue')