コード例 #1
0
ファイル: worker.py プロジェクト: neverspill/optomatic
class Worker:

    def __init__(self, project_name, experiment_name, clf, X, y, objective, 
                   host='localhost', port=27017, check_every=1, loop_forever=True):
        self.jobsDB = JobsDB(project_name, experiment_name, host, port)

        self.n_trials = -1 # loop-forever
        if not loop_forever:
            # we're probably running in the multi-experiment mode
            # so once we've computed everything in this experiment
            # we will exit to let the next one run.
            self.n_trials = self.jobsDB.get_queued_jobs().count()

        self.clf = clf # estimator
        self.X = X # X features
        self.y = y # y labels
        self.objective = objective
        self.check_every = check_every # delay in seconds between checking for jobs

    def start_worker(self):
        if self.n_trials > -1:
            logging.info('Worker will close after the {} jobs in this experiment.'.format(self.n_trials))
            for i in range(self.n_trials):
                self.compute()
            # print some stats
            self.jobsDB.print_job_stats()

        else:
            while True:
                self.compute()

    def get_next_params(self):
        job = None
        while job is None:
            job = self.jobsDB.get_next_job_from_queue()
            if job is not None:
                logger.info(job)
            else:
                logger.info('No queued job. Waiting {}s for new jobs...'.format(self.check_every))
                time.sleep(self.check_every)
        return job

    def compute(self):
        job = self.get_next_params()
        clf_params = job['params']
        for p in clf_params:
            # one day, if/when python 3 is ubiquitous, this won't be necessary...
            if isinstance( clf_params[p], unicode ): 
                clf_params[p] = str(clf_params[p])

        scores = self.objective(self.clf, clf_params, self.X, self.y)
        logger.debug("scores from objective: {}".format(scores))

        loss = np.mean(scores)
        std = np.std(scores)

        # then report these results back in the db...
        aux_data = {'loss': loss, 'std': std}
        self.jobsDB.report_job_completion(job['_id'], loss, aux_data=aux_data)