def test_suggest_1(self): docs = self.algo.suggest([0], Trials()) assert len(docs) == 1 # -- assert validity of docs trials = trials_from_docs(docs) assert docs[0]['misc']['idxs']['node_4'] == [0] idxs, vals = miscs_to_idxs_vals(trials.miscs) assert idxs['node_4'] == [0]
def test_seeding(self): # -- assert that the seeding works a particular way domain = coin_flip() docs = rand.suggest(list(range(10)), domain, Trials(), seed=123) trials = trials_from_docs(docs) idxs, vals = miscs_to_idxs_vals(trials.miscs) # Passes Nov 8 / 2013 self.assertEqual(list(idxs["flip"]), list(range(10))) self.assertEqual(list(vals["flip"]), [0, 1, 0, 0, 0, 0, 0, 1, 1, 0])
def test_suggest_1(self): print 'EXPR', self.bandit.expr docs = self.algo.suggest([0], Trials()) assert len(docs) == 1 print 'DOCS', docs # -- assert validity of docs trials = trials_from_docs(docs) print 'TRIALS', trials assert docs[0]['misc']['idxs']['flip'] == [0] idxs, vals = miscs_to_idxs_vals(trials.miscs) assert idxs['flip'] == [0]
def test_seeding(self): # -- assert that the seeding works a particular way domain = coin_flip() docs = rand.suggest(range(10), domain, Trials(), seed=123) trials = trials_from_docs(docs) idxs, vals = miscs_to_idxs_vals(trials.miscs) # Passes Nov 8 / 2013 self.assertEqual(list(idxs['flip']), range(10)) self.assertEqual(list(vals['flip']), [0, 1, 0, 0, 0, 0, 0, 1, 1, 0])
def test_arbitrary_range(self): new_ids = [-2, 0, 7, 'a', '007'] docs = self.algo.suggest(new_ids, Trials()) # -- assert validity of docs trials = trials_from_docs(docs) idxs, vals = miscs_to_idxs_vals(trials.miscs) assert len(docs) == 5 assert len(idxs) == 1 assert len(vals) == 1 print vals assert idxs['node_4'] == new_ids assert np.all(vals['node_4'] == [0, 1, 0, 1, 1])
def test_arbitrary_range(self): new_ids = [-2, 0, 7, 'a', '007'] docs = self.algo.suggest(new_ids, Trials()) # -- assert validity of docs trials = trials_from_docs(docs) idxs, vals = miscs_to_idxs_vals(trials.miscs) assert len(docs) == 5 assert len(idxs) == 1 assert len(vals) == 1 print vals assert idxs['flip'] == new_ids assert np.all(vals['flip'] == [0, 1, 0, 1, 1])
def test_suggest_5(self): docs = self.algo.suggest(range(5), Trials()) print docs assert len(docs) == 5 # -- assert validity of docs trials = trials_from_docs(docs) idxs, vals = miscs_to_idxs_vals(trials.miscs) print idxs print vals assert len(idxs) == 1 assert len(vals) == 1 assert idxs['node_4'] == range(5) assert np.all(vals['node_4'] == [1, 1, 0, 1, 0])
def test_suggest_5(self): docs = self.algo.suggest(range(5), Trials()) print docs assert len(docs) == 5 # -- assert validity of docs trials = trials_from_docs(docs) idxs, vals = miscs_to_idxs_vals(trials.miscs) print idxs print vals assert len(idxs) == 1 assert len(vals) == 1 assert idxs['flip'] == range(5) assert np.all(vals['flip'] == [1, 1, 0, 1, 0])
def test_arbitrary_range(self, N=10): assert N <= 10 new_ids = [-2, 0, 7, 'a', '007', 66, 'a3', '899', 23, 2333][:N] docs = self.algo.suggest(new_ids, Trials()) # -- assert validity of docs trials = trials_from_docs(docs) idxs, vals = miscs_to_idxs_vals(trials.miscs) assert len(docs) == N assert len(idxs) == 1 assert len(vals) == 1 print vals assert idxs['flip'] == new_ids # -- assert that the random seed matches that of Jan 8/2013 assert np.all(vals['flip'] == [0, 1, 0, 0, 0, 0, 0, 1, 1, 0][:N])
def test_suggest_N(self, N=10): assert N <= 10 docs = self.algo.suggest(range(N), Trials()) print 'docs', docs assert len(docs) == N # -- assert validity of docs trials = trials_from_docs(docs) idxs, vals = miscs_to_idxs_vals(trials.miscs) print 'idxs', idxs print 'vals', vals assert len(idxs) == 1 assert len(vals) == 1 assert idxs['flip'] == range(N) # -- only works when N == 5 assert np.all(vals['flip'] == [0, 1, 0, 0, 0, 0, 0, 1, 1, 0][:N])
def suggest(self, new_ids, trials): round_len = self.round_len helper = BoostHelper(trials.trials) specs, results, miscs = filter_ok_trials(trials) if miscs: rounds = [helper.round_of(m) for m in miscs] # -- actually the rounds of completed trials complete_rounds = [helper.round_of(m) for m, r in zip(miscs, results) if 'loss' in r] max_round = max(rounds) urounds = np.unique(rounds) urounds.sort() assert list(urounds) == range(max_round + 1) rounds_counts = [rounds.count(j) for j in urounds] complete_rounds_counts = [complete_rounds.count(j) for j in urounds] assert all([rc == crc >= round_len for crc, rc in zip(rounds_counts[:-1], complete_rounds_counts[:-1])]) round_decs = [[s['decisions'] for m, s in zip(miscs, specs) if helper.round_of(m) == j] for j in urounds] assert all([all([_rd == rd[0] for _rd in rd]) for rd in round_decs]) round_decs = [rd[0] for rd in round_decs] if complete_rounds_counts[-1] >= round_len: my_round = max_round + 1 last_specs = [s for s, m in zip(specs, miscs) if helper.round_of(m) == max_round] last_results = [s for s, m in zip(results, miscs) if helper.round_of(m) == max_round] last_miscs = [m for m in miscs if helper.round_of(m) == max_round] losses = np.array(map(self.bandit.loss, last_results, last_specs)) last_best = losses.argmin() decisions = last_results[last_best]['decisions'] decisions_src = last_miscs[last_best]['tid'] else: my_round = max_round decisions = round_decs[-1] decisions_src = helper.continues(miscs[-1]) if decisions_src != None: decisions_src = decisions_src['tid'] else: decisions = None my_round = 0 decisions_src = None selected_trial_docs = [t for t in trials if helper.round_of(t) == my_round] selected_trials = trials_from_docs(selected_trial_docs, exp_key=trials._exp_key) new_trial_docs = self.sub_algo.suggest(new_ids, selected_trials) for trial in new_trial_docs: # -- patch in decisions of the best current model from previous # round spec = trial['spec'] assert spec['decisions'] == None spec['decisions'] = decisions misc = trial['misc'] misc['boosting'] = { 'variant': 'sync', 'round': my_round, 'continues': decisions_src} return new_trial_docs
def suggest(self, new_ids, trials): if len(new_ids) > 1: raise NotImplementedError() STATUS_OK = hyperopt.STATUS_OK docs = [d for d in trials if self.bandit.status(d['result'], d['spec']) == STATUS_OK] # -- This suggest() implementation requires that there are no dangling # from_tid pointers, so this loop strips them out of the docs list. while True: tids = set([d['tid'] for d in docs]) docs_ = [d for d in docs if d['misc'].get('from_tid', d['tid']) in tids] if len(docs_) == len(docs): break else: docs = docs_ helper = BoostHelper(docs) round_of = helper.round_of round_len = self.round_len look_back = self.look_back my_round = 0 cont_decisions = None cont_tid = None continuing_trials_docs = [] if docs: # -- pick a trial to continue rounds_counts = np.bincount(map(round_of, docs)) assert np.all(rounds_counts > 0) assert np.all(rounds_counts[:-1] >= round_len) # -- this is the round of the trial we're going to suggest if rounds_counts[-1] >= round_len: my_round = len(rounds_counts) else: my_round = len(rounds_counts) - 1 horizon = my_round - look_back consider_continuing = [d for d in docs if horizon <= round_of(d) < my_round] #print 'losses', np.array(map(self.bandit.loss, results, specs)) if consider_continuing: cc = consider_continuing cc_losses = map(self.bandit.loss, [d['result'] for d in consider_continuing], [d['spec'] for d in consider_continuing]) cont_idx = np.argmin(cc_losses) cont_decisions = cc[cont_idx]['result']['decisions'] cont_tid = cc[cont_idx]['tid'] assert cont_tid != None assert new_ids[0] != cont_tid continuing_trials_docs = helper.continuing(cc[cont_idx]) else: continuing_trials_docs = helper.continuing(None) # -- validate=False makes this a lot faster continuing_trials = trials_from_docs(continuing_trials_docs, exp_key=trials._exp_key, validate=False) new_trial_docs = self.sub_algo.suggest(new_ids, continuing_trials) for trial in new_trial_docs: # -- patch in decisions of the best current model from previous # round # -- This is an assertion because the Bandit should be written # to use these values, and thus be written with the awareness # that they are coming... spec = trial['spec'] assert spec['decisions'] == None spec['decisions'] = cont_decisions misc = trial['misc'] assert 'boosting' not in misc assert trial['tid'] != cont_tid misc['boosting'] = { 'variant': 'sync', 'round': my_round, 'continues': cont_tid} return new_trial_docs
def suggest( new_ids, domain, trials, sub_suggest, min_ok_per_round=1, min_valid_per_round=1, absolute_loss_thresh=1.0, relative_loss_thresh=None, ): """ Parameters ---------- min_ok_per_round - int A trial cannot be extended in the ensemble until it has this many siblings with status 'ok' and a loss <= absolute_loss_thresh. min_valid_per_round - int A trial cannot be extended in the ensemble until it has this many siblings whose job state is not ERROR. absolute_loss_thresh - float Jobs with loss greater than this are not counted as 'ok'. relative_loss_thresh - None or float A child cannot become a parent in the ensemble unless it improves on its parent with a loss <= relative_loss_thresh * parent_loss. This search algo works by injecting a ['misc']['boosting'] subdocument into every trial, with keys: * variant - identify the type of boosting at work * continues - the trial ID (tid) of the previously selected trial in the ensemble, or `None` for first-round trials In order for boosting to work properly, the 'loss' reported by trial must represent the CUMULATIVE ENSEMBLE LOSS if the ensemble were to be extended to include that particular trial. """ new_id, = new_ids valid_docs = [t for t in trials if t['state'] != hyperopt.JOB_STATE_ERROR] # -- ok_docs are those which are eligible to be a member of the # final ensemble. ok_docs = [t for t in valid_docs if t['result']['status'] == hyperopt.STATUS_OK and t['result']['loss'] <= absolute_loss_thresh] logger.info('n_ok: %i n_valid: %i' % (len(ok_docs), len(valid_docs))) valid_helper = BoostHelper(valid_docs) ok_helper = BoostHelper(ok_docs) cur_parent = None cur_parent_tid = None while True: n_ok_children = len(ok_helper.children(cur_parent)) n_valid_children = len(valid_helper.children(cur_parent)) logger.info('cur_parent: %s n_ok_children: %i n_valid_children: %i' % (None if cur_parent is None else cur_parent['tid'], n_ok_children, n_valid_children)) if n_ok_children < min_ok_per_round: break if n_valid_children < min_valid_per_round: break best_child = ok_helper.best_child(cur_parent) assert best_child is not None # -- because ok_helper has some elements if None not in (cur_parent, relative_loss_thresh): rel_thresh = cur_parent['result']['loss'] * relative_loss_thresh if best_child['result']['loss'] >= rel_thresh: break logger.info('best_child: %i' % best_child['tid']) cur_parent = best_child cur_parent_tid = best_child['tid'] del best_child cur_siblings = valid_helper.children(cur_parent) current_trials = trials_from_docs( cur_siblings, exp_key=trials._exp_key, # -- validate=False is much faster validate=False) new_trial_docs = sub_suggest([new_id], domain, current_trials) for trial in new_trial_docs: misc = trial['misc'] # -- boosting cannot be nested with current data structure assert 'boosting' not in misc # -- I think the following was a debugging sanity check assert trial['tid'] == new_id misc['boosting'] = { 'variant': { 'name': 'async_suggest', 'min_ok_per_round': min_ok_per_round, 'min_valid_per_round': min_valid_per_round, 'relative_loss_thresh': relative_loss_thresh, 'absolute_loss_thresh': absolute_loss_thresh, }, 'continues': cur_parent_tid} return new_trial_docs
def idxs_vals_from_ids(self, ids, seed): docs = self.suggest(ids, self.domain, Trials(), seed) trials = trials_from_docs(docs) idxs, vals = miscs_to_idxs_vals(trials.miscs) return idxs, vals
def suggest( new_ids, domain, trials, sub_suggest, min_ok_per_round=1, min_valid_per_round=1, absolute_loss_thresh=1.0, relative_loss_thresh=None, ): """ Parameters ---------- min_ok_per_round - int A trial cannot be extended in the ensemble until it has this many siblings with status 'ok' and a loss <= absolute_loss_thresh. min_valid_per_round - int A trial cannot be extended in the ensemble until it has this many siblings whose job state is not ERROR. absolute_loss_thresh - float Jobs with loss greater than this are not counted as 'ok'. relative_loss_thresh - None or float A child cannot become a parent in the ensemble unless it improves on its parent with a loss <= relative_loss_thresh * parent_loss. This search algo works by injecting a ['misc']['boosting'] subdocument into every trial, with keys: * variant - identify the type of boosting at work * continues - the trial ID (tid) of the previously selected trial in the ensemble, or `None` for first-round trials In order for boosting to work properly, the 'loss' reported by trial must represent the CUMULATIVE ENSEMBLE LOSS if the ensemble were to be extended to include that particular trial. """ new_id, = new_ids valid_docs = [t for t in trials if t['state'] != hyperopt.JOB_STATE_ERROR] # -- ok_docs are those which are eligible to be a member of the # final ensemble. ok_docs = [ t for t in valid_docs if t['result']['status'] == hyperopt.STATUS_OK and t['result']['loss'] <= absolute_loss_thresh ] logger.info('n_ok: %i n_valid: %i' % (len(ok_docs), len(valid_docs))) valid_helper = BoostHelper(valid_docs) ok_helper = BoostHelper(ok_docs) cur_parent = None cur_parent_tid = None while True: n_ok_children = len(ok_helper.children(cur_parent)) n_valid_children = len(valid_helper.children(cur_parent)) logger.info('cur_parent: %s n_ok_children: %i n_valid_children: %i' % (None if cur_parent is None else cur_parent['tid'], n_ok_children, n_valid_children)) if n_ok_children < min_ok_per_round: break if n_valid_children < min_valid_per_round: break best_child = ok_helper.best_child(cur_parent) assert best_child is not None # -- because ok_helper has some elements if None not in (cur_parent, relative_loss_thresh): rel_thresh = cur_parent['result']['loss'] * relative_loss_thresh if best_child['result']['loss'] >= rel_thresh: break logger.info('best_child: %i' % best_child['tid']) cur_parent = best_child cur_parent_tid = best_child['tid'] del best_child cur_siblings = valid_helper.children(cur_parent) current_trials = trials_from_docs( cur_siblings, exp_key=trials._exp_key, # -- validate=False is much faster validate=False) new_trial_docs = sub_suggest([new_id], domain, current_trials) for trial in new_trial_docs: misc = trial['misc'] # -- boosting cannot be nested with current data structure assert 'boosting' not in misc # -- I think the following was a debugging sanity check assert trial['tid'] == new_id misc['boosting'] = { 'variant': { 'name': 'async_suggest', 'min_ok_per_round': min_ok_per_round, 'min_valid_per_round': min_valid_per_round, 'relative_loss_thresh': relative_loss_thresh, 'absolute_loss_thresh': absolute_loss_thresh, }, 'continues': cur_parent_tid } return new_trial_docs