Example #1
0
 def collect_result(self, session):
     """Collect result from processed workers."""
     try:
         workers, submissions = zip(*[
             self._processing_worker_queue.get()
             for _ in range(self._processing_worker_queue.qsize())
         ])
     except ValueError:
         logger.info('No workers are currently waiting or processed.')
         if self.hunger_policy == 'sleep':
             time.sleep(5)
         elif self.hunger_policy == 'exit':
             self._poison_pill = True
         return
     for worker, (submission_id,
                  submission_name) in zip(workers, submissions):
         if worker.status == 'running':
             self._processing_worker_queue.put_nowait(
                 (worker, (submission_id, submission_name)))
             logger.info('Worker {} is still running'.format(worker))
             time.sleep(0)
         else:
             logger.info('Collecting results from worker {}'.format(worker))
             returncode, stderr = worker.collect_results()
             set_submission_state(
                 session, submission_id,
                 'tested' if not returncode else 'training_error')
             set_submission_error_msg(session, submission_id, stderr)
             self._processed_submission_queue.put_nowait(
                 (submission_id, submission_name))
             worker.teardown()
Example #2
0
 def update_database_results(self, session):
     """Update the database with the results of ramp_test_submission."""
     while not self._processed_submission_queue.empty():
         submission_id, submission_name = \
             self._processed_submission_queue.get_nowait()
         if 'error' in get_submission_state(session, submission_id):
             update_leaderboards(session, self._ramp_config['event_name'])
             update_all_user_leaderboards(session,
                                          self._ramp_config['event_name'])
             logger.info('Skip update for {} due to failure during the '
                         'processing'.format(submission_name))
             continue
         logger.info('Update the results obtained on each fold for '
                     '{}'.format(submission_name))
         path_predictions = os.path.join(
             self._worker_config['predictions_dir'], submission_name
         )
         set_predictions(session, submission_id, path_predictions)
         set_time(session, submission_id, path_predictions)
         set_scores(session, submission_id, path_predictions)
         set_bagged_scores(session, submission_id, path_predictions)
         set_submission_state(session, submission_id, 'scored')
         update_leaderboards(session, self._ramp_config['event_name'])
         update_all_user_leaderboards(session,
                                      self._ramp_config['event_name'])
Example #3
0
    def update_database_results(self, session):
        """Update the database with the results of ramp_test_submission."""
        make_update_leaderboard = False
        while not self._processed_submission_queue.empty():
            make_update_leaderboard = True
            submission_id, submission_name = \
                self._processed_submission_queue.get_nowait()
            if 'error' in get_submission_state(session, submission_id):
                continue
            logger.info('Write info in database for submission {}'.format(
                submission_name))
            path_predictions = os.path.join(
                self._worker_config['predictions_dir'], submission_name)
            # NOTE: In the past we were adding the predictions into the
            # database. Since they require too much space, we stop to store
            # them in the database and instead, keep it onto the disk.
            # set_predictions(session, submission_id, path_predictions)
            set_time(session, submission_id, path_predictions)
            set_scores(session, submission_id, path_predictions)
            set_bagged_scores(session, submission_id, path_predictions)
            set_submission_state(session, submission_id, 'scored')

        if make_update_leaderboard:
            logger.info('Update all leaderboards')
            update_leaderboards(session, self._ramp_config['event_name'])
            update_all_user_leaderboards(session,
                                         self._ramp_config['event_name'])
Example #4
0
 def fetch_from_db(self, session):
     """Fetch the submission from the database and create the workers."""
     submissions = get_submissions(session,
                                   self._ramp_config['event_name'],
                                   state='new')
     if not submissions:
         return
     for submission_id, submission_name, _ in submissions:
         # do not train the sandbox submission
         submission = get_submission_by_id(session, submission_id)
         if not submission.is_not_sandbox:
             continue
         # create the worker
         worker = self.worker(self._worker_config, submission_name)
         set_submission_state(session, submission_id, 'sent_to_training')
         update_user_leaderboards(
             session,
             self._ramp_config['event_name'],
             submission.team.name,
             new_only=True,
         )
         self._awaiting_worker_queue.put_nowait(
             (worker, (submission_id, submission_name)))
         logger.info('Submission {} added to the queue of submission to be '
                     'processed'.format(submission_name))
Example #5
0
    def collect_result(self, session):
        """Collect result from processed workers."""
        try:
            workers, submissions = zip(*[
                self._processing_worker_queue.get()
                for _ in range(self._processing_worker_queue.qsize())
            ])
        except ValueError:
            if self.hunger_policy == 'sleep':
                time.sleep(5)
            elif self.hunger_policy == 'exit':
                self._poison_pill = True
            return

        for worker, (submission_id,
                     submission_name) in zip(workers, submissions):
            dt = worker.time_since_last_status_check()
            if (dt is not None) and (dt < self.time_between_collection):
                self._processing_worker_queue.put_nowait(
                    (worker, (submission_id, submission_name)))
                time.sleep(0)
                continue
            elif worker.status == 'running':
                self._processing_worker_queue.put_nowait(
                    (worker, (submission_id, submission_name)))
                time.sleep(0)
            elif worker.status == 'retry':
                set_submission_state(session, submission_id, 'new')
                self._logger.info(
                    f'Submission: {submission_id} has been interrupted. '
                    'It will be added to queue again and retried.')
                worker.teardown()
            else:
                self._logger.info(f'Collecting results from worker {worker}')
                returncode, stderr = worker.collect_results()

                if returncode:
                    if returncode == 124:
                        self._logger.info(
                            f'Worker {worker} killed due to timeout.')
                        submission_status = 'training_error'
                    elif returncode == 2:
                        # Error occurred when downloading the logs
                        submission_status = 'checking_error'
                    else:
                        self._logger.info(
                            f'Worker {worker} killed due to an error '
                            f'during training: {stderr}')
                        submission_status = 'training_error'
                else:
                    submission_status = 'tested'
                set_submission_state(session, submission_id, submission_status)
                set_submission_error_msg(session, submission_id, stderr)
                self._processed_submission_queue.put_nowait(
                    (submission_id, submission_name))
                worker.teardown()
Example #6
0
 def launch_workers(self, session):
     """Launch the awaiting workers if possible."""
     while (not self._processing_worker_queue.full()
            and not self._awaiting_worker_queue.empty()):
         worker, (submission_id, submission_name) = \
             self._awaiting_worker_queue.get()
         logger.info('Starting worker: {}'.format(worker))
         worker.setup()
         worker.launch_submission()
         set_submission_state(session, submission_id, 'training')
         self._processing_worker_queue.put_nowait(
             (worker, (submission_id, submission_name)))
         logger.info(
             'Store the worker {} into the processing queue'.format(worker))
     if self._processing_worker_queue.full():
         logger.info('The processing queue is full. Waiting for a worker to'
                     ' finish')
Example #7
0
    def collect_result(self, session):
        """Collect result from processed workers."""
        try:
            workers, submissions = zip(*[
                self._processing_worker_queue.get()
                for _ in range(self._processing_worker_queue.qsize())
            ])
        except ValueError:
            if self.hunger_policy == 'sleep':
                time.sleep(5)
            elif self.hunger_policy == 'exit':
                self._poison_pill = True
            return

        for worker, (submission_id,
                     submission_name) in zip(workers, submissions):
            dt = worker.time_since_last_status_check()
            if dt is not None and dt < self.time_between_collection:
                self._processing_worker_queue.put_nowait(
                    (worker, (submission_id, submission_name)))
                time.sleep(0)
                continue
            elif worker.status == 'running':
                self._processing_worker_queue.put_nowait(
                    (worker, (submission_id, submission_name)))
                time.sleep(0)
            else:
                logger.info(f'Collecting results from worker {worker}')
                returncode, stderr = worker.collect_results()
                if returncode:
                    if returncode == 124:
                        logger.info(
                            'Worker {} killed due to timeout.'.format(worker))
                    else:
                        logger.info(f'Worker {worker} killed due to an error '
                                    'during training')
                    submission_status = 'training_error'
                else:
                    submission_status = 'tested'

                set_submission_state(session, submission_id, submission_status)
                set_submission_error_msg(session, submission_id, stderr)
                self._processed_submission_queue.put_nowait(
                    (submission_id, submission_name))
                worker.teardown()
Example #8
0
    def launch_workers(self, session):
        """Launch the awaiting workers if possible."""
        while (not self._processing_worker_queue.full()
               and not self._awaiting_worker_queue.empty()):
            worker, (submission_id, submission_name) = \
                self._awaiting_worker_queue.get()
            self._logger.info(f'Starting worker: {worker}')

            try:
                worker.setup()
                if worker.status != "error":
                    worker.launch_submission()
            except Exception as e:
                self._logger.error(
                    f'Worker finished with unhandled exception:\n {e}')
                worker.status = 'error'
            if worker.status == 'error':
                set_submission_state(session, submission_id, 'checking_error')
                worker.teardown()  # kill the worker
                self._logger.info(f'Worker {worker} killed due to an error '
                                  f'while connecting to AWS worker')
                stderr = ("There was a problem with sending your submission"
                          " for training. This problem is on RAMP side"
                          " and most likely it is not related to your"
                          " code. If this happened for the first time"
                          " to this submission you might"
                          " consider submitting the same code once again."
                          " Else, please contact the event organizers.")
                set_submission_error_msg(session, submission_id, stderr)
                continue
            set_submission_state(session, submission_id, 'training')
            submission = get_submission_by_id(session, submission_id)
            update_user_leaderboards(
                session,
                self._ramp_config['event_name'],
                submission.team.name,
                new_only=True,
            )
            self._processing_worker_queue.put_nowait(
                (worker, (submission_id, submission_name)))
            self._logger.info(
                f'Store the worker {worker} into the processing queue')
Example #9
0
def test_score_submission(session_scope_module):
    submission_id = 9
    multi_index = pd.MultiIndex.from_product(
        [[0, 1], ['train', 'valid', 'test']], names=['fold', 'step']
    )
    expected_df = pd.DataFrame(
        {'acc': [0.604167, 0.583333, 0.733333, 0.604167, 0.583333, 0.733333],
         'error': [0.395833, 0.416667, 0.266667, 0.395833, 0.416667, 0.266667],
         'nll': [0.732763, 2.194549, 0.693464, 0.746132, 2.030762, 0.693992],
         'f1_70': [0.333333, 0.33333, 0.666667, 0.33333, 0.33333, 0.666667]},
        index=multi_index
    )
    path_results = os.path.join(HERE, 'data', 'iris_predictions')
    with pytest.raises(ValueError, match='Submission state must be "tested"'):
        score_submission(session_scope_module, submission_id)
    set_submission_state(session_scope_module, submission_id, 'tested')
    set_predictions(session_scope_module, submission_id, path_results)
    score_submission(session_scope_module, submission_id)
    scores = get_scores(session_scope_module, submission_id)
    assert_frame_equal(scores, expected_df, check_less_precise=True)
Example #10
0
 def launch_workers(self, session):
     """Launch the awaiting workers if possible."""
     while (not self._processing_worker_queue.full()
            and not self._awaiting_worker_queue.empty()):
         worker, (submission_id, submission_name) = \
             self._awaiting_worker_queue.get()
         logger.info('Starting worker: {}'.format(worker))
         worker.setup()
         if worker.status == 'error':
             set_submission_state(session, submission_id, 'checking_error')
             continue
         worker.launch_submission()
         if worker.status == 'error':
             set_submission_state(session, submission_id, 'checking_error')
             continue
         set_submission_state(session, submission_id, 'training')
         submission = get_submission_by_id(session, submission_id)
         update_user_leaderboards(
             session,
             self._ramp_config['event_name'],
             submission.team.name,
             new_only=True,
         )
         self._processing_worker_queue.put_nowait(
             (worker, (submission_id, submission_name)))
         logger.info(
             'Store the worker {} into the processing queue'.format(worker))
Example #11
0
 def launch(self):
     """Launch the dispatcher."""
     logger.info('Starting the RAMP dispatcher')
     with session_scope(self._database_config) as session:
         logger.info('Open a session to the database')
         try:
             while not self._poison_pill:
                 self.fetch_from_db(session)
                 self.launch_workers(session)
                 self.collect_result(session)
                 self.update_database_results(session)
         finally:
             # reset the submissions to 'new' in case of error or unfinished
             # training
             submissions = get_submissions(session,
                                           self._ramp_config['event_name'],
                                           state=None)
             for submission_id, _, _ in submissions:
                 submission_state = get_submission_state(
                     session, submission_id)
                 if submission_state in ('training', 'send_to_training'):
                     set_submission_state(session, submission_id, 'new')
         logger.info('Dispatcher killed by the poison pill')
Example #12
0
    def update_database_results(self, session):
        """Update the database with the results of ramp_test_submission."""
        make_update_leaderboard = False
        while not self._processed_submission_queue.empty():
            make_update_leaderboard = True
            submission_id, submission_name = \
                self._processed_submission_queue.get_nowait()
            if 'error' in get_submission_state(session, submission_id):
                continue
            logger.info('Write info in data base for submission {}'.format(
                submission_name))
            path_predictions = os.path.join(
                self._worker_config['predictions_dir'], submission_name)
            set_predictions(session, submission_id, path_predictions)
            set_time(session, submission_id, path_predictions)
            set_scores(session, submission_id, path_predictions)
            set_bagged_scores(session, submission_id, path_predictions)
            set_submission_state(session, submission_id, 'scored')

        if make_update_leaderboard:
            logger.info('Update all leaderboards')
            update_leaderboards(session, self._ramp_config['event_name'])
            update_all_user_leaderboards(session,
                                         self._ramp_config['event_name'])
Example #13
0
def test_make_submission_resubmission(base_db):
    # check that resubmitting the a submission with the same name will raise
    # an error
    session = base_db
    config = ramp_config_template()
    event_name, username = _setup_sign_up(session)
    ramp_config = generate_ramp_config(read_config(config))

    # submitting the starting_kit which is used as the default submission for
    # the sandbox should raise an error
    err_msg = ('Submission "starting_kit" of team "test_user" at event '
               '"iris_test" exists already')
    with pytest.raises(DuplicateSubmissionError, match=err_msg):
        add_submission(session, event_name, username,
                       os.path.basename(ramp_config['ramp_sandbox_dir']),
                       ramp_config['ramp_sandbox_dir'])

    # submitting twice a normal submission should raise an error as well
    submission_name = 'random_forest_10_10'
    path_submission = os.path.join(
        os.path.dirname(ramp_config['ramp_sandbox_dir']), submission_name)
    # first submission
    add_submission(
        session,
        event_name,
        username,
        submission_name,
        path_submission,
    )
    # mock that we scored the submission
    set_submission_state(session, 5, 'scored')
    # second submission
    err_msg = ('Submission "random_forest_10_10" of team "test_user" at event '
               '"iris_test" exists already')
    with pytest.raises(DuplicateSubmissionError, match=err_msg):
        add_submission(session, event_name, username, submission_name,
                       path_submission)

    # a resubmission can take place if it is tagged as "new" or failed

    # mock that the submission failed during the training
    set_submission_state(session, 5, 'training_error')
    add_submission(session, event_name, username, submission_name,
                   path_submission)
    # mock that the submissions are new submissions
    set_submission_state(session, 5, 'new')
    add_submission(session, event_name, username, submission_name,
                   path_submission)
Example #14
0
 def _reset_submission_after_failure(session, even_name):
     submissions = get_submissions(session, even_name, state=None)
     for submission_id, _, _ in submissions:
         submission_state = get_submission_state(session, submission_id)
         if submission_state in ('training', 'send_to_training'):
             set_submission_state(session, submission_id, 'new')
Example #15
0
def test_set_submission_state_unknown_state(session_scope_module):
    with pytest.raises(UnknownStateError, match='Unrecognized state'):
        set_submission_state(session_scope_module, 2, 'unknown')
Example #16
0
def test_set_submission_state(session_scope_module):
    submission_id = 2
    set_submission_state(session_scope_module, submission_id, 'trained')
    state = get_submission_state(session_scope_module, submission_id)
    assert state == 'trained'