Example #1
0
 def test_suggest_1(self):
     docs = self.algo.suggest([0], Trials())
     assert len(docs) == 1
     # -- assert validity of docs
     trials = trials_from_docs(docs)
     assert docs[0]['misc']['idxs']['node_4'] == [0]
     idxs, vals = miscs_to_idxs_vals(trials.miscs)
     assert idxs['node_4'] == [0]
Example #2
0
    def test_seeding(self):
        # -- assert that the seeding works a particular way

        domain = coin_flip()
        docs = rand.suggest(list(range(10)), domain, Trials(), seed=123)
        trials = trials_from_docs(docs)
        idxs, vals = miscs_to_idxs_vals(trials.miscs)

        # Passes Nov 8 / 2013
        self.assertEqual(list(idxs["flip"]), list(range(10)))
        self.assertEqual(list(vals["flip"]), [0, 1, 0, 0, 0, 0, 0, 1, 1, 0])
Example #3
0
 def test_suggest_1(self):
     print 'EXPR', self.bandit.expr
     docs = self.algo.suggest([0], Trials())
     assert len(docs) == 1
     print 'DOCS', docs
     # -- assert validity of docs
     trials = trials_from_docs(docs)
     print 'TRIALS', trials
     assert docs[0]['misc']['idxs']['flip'] == [0]
     idxs, vals = miscs_to_idxs_vals(trials.miscs)
     assert idxs['flip'] == [0]
Example #4
0
    def test_seeding(self):
        # -- assert that the seeding works a particular way

        domain = coin_flip()
        docs = rand.suggest(range(10), domain, Trials(), seed=123)
        trials = trials_from_docs(docs)
        idxs, vals = miscs_to_idxs_vals(trials.miscs)

        # Passes Nov 8 / 2013 
        self.assertEqual(list(idxs['flip']), range(10))
        self.assertEqual(list(vals['flip']), [0, 1, 0, 0, 0, 0, 0, 1, 1, 0])
Example #5
0
 def test_arbitrary_range(self):
     new_ids = [-2, 0, 7, 'a', '007']
     docs = self.algo.suggest(new_ids, Trials())
     # -- assert validity of docs
     trials = trials_from_docs(docs)
     idxs, vals = miscs_to_idxs_vals(trials.miscs)
     assert len(docs) == 5
     assert len(idxs) == 1
     assert len(vals) == 1
     print vals
     assert idxs['node_4'] == new_ids
     assert np.all(vals['node_4'] == [0, 1, 0, 1, 1])
Example #6
0
 def test_arbitrary_range(self):
     new_ids = [-2, 0, 7, 'a', '007']
     docs = self.algo.suggest(new_ids, Trials())
     # -- assert validity of docs
     trials = trials_from_docs(docs)
     idxs, vals = miscs_to_idxs_vals(trials.miscs)
     assert len(docs) == 5
     assert len(idxs) == 1
     assert len(vals) == 1
     print vals
     assert idxs['flip'] == new_ids
     assert np.all(vals['flip'] == [0, 1, 0, 1, 1])
Example #7
0
 def test_suggest_5(self):
     docs = self.algo.suggest(range(5), Trials())
     print docs
     assert len(docs) == 5
     # -- assert validity of docs
     trials = trials_from_docs(docs)
     idxs, vals = miscs_to_idxs_vals(trials.miscs)
     print idxs
     print vals
     assert len(idxs) == 1
     assert len(vals) == 1
     assert idxs['node_4'] == range(5)
     assert np.all(vals['node_4'] == [1, 1, 0, 1, 0])
Example #8
0
 def test_suggest_5(self):
     docs = self.algo.suggest(range(5), Trials())
     print docs
     assert len(docs) == 5
     # -- assert validity of docs
     trials = trials_from_docs(docs)
     idxs, vals = miscs_to_idxs_vals(trials.miscs)
     print idxs
     print vals
     assert len(idxs) == 1
     assert len(vals) == 1
     assert idxs['flip'] == range(5)
     assert np.all(vals['flip'] == [1, 1, 0, 1, 0])
Example #9
0
    def test_arbitrary_range(self, N=10):
        assert N <= 10
        new_ids = [-2, 0, 7, 'a', '007', 66, 'a3', '899', 23, 2333][:N]
        docs = self.algo.suggest(new_ids, Trials())
        # -- assert validity of docs
        trials = trials_from_docs(docs)
        idxs, vals = miscs_to_idxs_vals(trials.miscs)
        assert len(docs) == N
        assert len(idxs) == 1
        assert len(vals) == 1
        print vals
        assert idxs['flip'] == new_ids

        # -- assert that the random seed matches that of Jan 8/2013
        assert np.all(vals['flip'] == [0, 1, 0, 0, 0, 0, 0, 1, 1, 0][:N])
Example #10
0
 def test_suggest_N(self, N=10):
     assert N <= 10
     docs = self.algo.suggest(range(N), Trials())
     print 'docs', docs
     assert len(docs) == N
     # -- assert validity of docs
     trials = trials_from_docs(docs)
     idxs, vals = miscs_to_idxs_vals(trials.miscs)
     print 'idxs', idxs
     print 'vals', vals
     assert len(idxs) == 1
     assert len(vals) == 1
     assert idxs['flip'] == range(N)
     # -- only works when N == 5
     assert np.all(vals['flip'] == [0, 1, 0, 0, 0, 0,  0, 1, 1, 0][:N])
Example #11
0
    def suggest(self, new_ids, trials):
        round_len = self.round_len
        helper = BoostHelper(trials.trials)

        specs, results, miscs = filter_ok_trials(trials)

        if miscs:
            rounds = [helper.round_of(m) for m in miscs]
            # -- actually the rounds of completed trials
            complete_rounds = [helper.round_of(m)
                    for m, r in zip(miscs, results) if 'loss' in r]

            max_round = max(rounds)
            urounds = np.unique(rounds)
            urounds.sort()
            assert list(urounds) == range(max_round + 1)

            rounds_counts = [rounds.count(j) for j in urounds]
            complete_rounds_counts = [complete_rounds.count(j)
                    for j in urounds]
            assert all([rc == crc >= round_len
                for crc, rc in zip(rounds_counts[:-1],
                    complete_rounds_counts[:-1])])

            round_decs = [[s['decisions']
                for m, s in zip(miscs, specs)
                if helper.round_of(m) == j] for j in urounds]
            assert all([all([_rd == rd[0]
                for _rd in rd]) for rd in round_decs])
            round_decs = [rd[0] for rd in round_decs]

            if complete_rounds_counts[-1] >= round_len:
                my_round = max_round + 1
                last_specs = [s
                        for s, m in zip(specs, miscs)
                        if helper.round_of(m) == max_round]
                last_results = [s
                        for s, m in zip(results, miscs)
                        if helper.round_of(m) == max_round]
                last_miscs = [m for m in miscs
                        if helper.round_of(m) == max_round]
                losses = np.array(map(self.bandit.loss, last_results, last_specs))
                last_best = losses.argmin()
                decisions = last_results[last_best]['decisions']
                decisions_src = last_miscs[last_best]['tid']
            else:
                my_round = max_round
                decisions = round_decs[-1]
                decisions_src = helper.continues(miscs[-1])
                if decisions_src != None:
                    decisions_src = decisions_src['tid']
        else:
            decisions = None
            my_round = 0
            decisions_src = None

        selected_trial_docs = [t for t in trials
                                  if helper.round_of(t) == my_round]        
        selected_trials = trials_from_docs(selected_trial_docs,
                                                  exp_key=trials._exp_key)
        new_trial_docs = self.sub_algo.suggest(new_ids, selected_trials)

        for trial in new_trial_docs:
            # -- patch in decisions of the best current model from previous
            #    round
            spec = trial['spec']
            assert spec['decisions'] == None
            spec['decisions'] = decisions

            misc = trial['misc']
            misc['boosting'] = {
                    'variant': 'sync',
                    'round': my_round,
                    'continues': decisions_src}

        return new_trial_docs
Example #12
0
    def suggest(self, new_ids, trials):
        if len(new_ids) > 1:
            raise NotImplementedError()

        STATUS_OK = hyperopt.STATUS_OK
        docs = [d for d in trials
                if self.bandit.status(d['result'], d['spec']) == STATUS_OK]
        # -- This suggest() implementation requires that there are no dangling
        #    from_tid pointers, so this loop strips them out of the docs list.
        while True:
            tids = set([d['tid'] for d in docs])
            docs_ = [d for d in docs
                    if d['misc'].get('from_tid', d['tid']) in tids]
            if len(docs_) == len(docs):
                break
            else:
                docs = docs_

        helper = BoostHelper(docs)
        round_of = helper.round_of

        round_len = self.round_len
        look_back = self.look_back

        my_round = 0
        cont_decisions = None
        cont_tid = None
        continuing_trials_docs = []

        if docs:
            # -- pick a trial to continue
            rounds_counts = np.bincount(map(round_of, docs))
            assert np.all(rounds_counts > 0)
            assert np.all(rounds_counts[:-1] >= round_len)
            # -- this is the round of the trial we're going to suggest
            if rounds_counts[-1] >= round_len:
                my_round = len(rounds_counts)
            else:
                my_round = len(rounds_counts) - 1
            horizon = my_round - look_back
            consider_continuing = [d for d in docs
                    if horizon <= round_of(d) < my_round]

            #print 'losses', np.array(map(self.bandit.loss, results, specs))

            if consider_continuing:
                cc = consider_continuing
                cc_losses = map(self.bandit.loss,
                                [d['result'] for d in consider_continuing],
                                [d['spec']   for d in consider_continuing])
                cont_idx = np.argmin(cc_losses)

                cont_decisions = cc[cont_idx]['result']['decisions']
                cont_tid = cc[cont_idx]['tid']
                assert cont_tid != None
                assert new_ids[0] != cont_tid
                continuing_trials_docs = helper.continuing(cc[cont_idx])
            else:
                continuing_trials_docs = helper.continuing(None)

        # -- validate=False makes this a lot faster
        continuing_trials = trials_from_docs(continuing_trials_docs,
                                            exp_key=trials._exp_key,
                                            validate=False)

        new_trial_docs = self.sub_algo.suggest(new_ids, continuing_trials)

        for trial in new_trial_docs:
            # -- patch in decisions of the best current model from previous
            #    round
            # -- This is an assertion because the Bandit should be written
            #    to use these values, and thus be written with the awareness
            #    that they are coming...
            spec = trial['spec']
            assert spec['decisions'] == None
            spec['decisions'] = cont_decisions

            misc = trial['misc']
            assert 'boosting' not in misc
            assert trial['tid'] != cont_tid
            misc['boosting'] = {
                    'variant': 'sync',
                    'round': my_round,
                    'continues': cont_tid}

        return new_trial_docs
Example #13
0
def suggest(
    new_ids, domain, trials, sub_suggest,
    min_ok_per_round=1,
    min_valid_per_round=1,
    absolute_loss_thresh=1.0,
    relative_loss_thresh=None,
    ):
    """

    Parameters
    ----------

    min_ok_per_round - int
        A trial cannot be extended in the ensemble until it has this many
        siblings with status 'ok' and a loss <= absolute_loss_thresh.

    min_valid_per_round - int
        A trial cannot be extended in the ensemble until it has this many
        siblings whose job state is not ERROR.

    absolute_loss_thresh - float
        Jobs with loss greater than this are not counted as 'ok'.

    relative_loss_thresh - None or float
        A child cannot become a parent in the ensemble unless it improves on its
        parent with a loss <= relative_loss_thresh * parent_loss.

    This search algo works by injecting a ['misc']['boosting'] subdocument into
    every trial, with keys:
      * variant - identify the type of boosting at work
      * continues - the trial ID (tid) of the previously selected trial in the
                    ensemble, or `None` for first-round trials

    In order for boosting to work properly, the 'loss' reported by trial must
    represent the CUMULATIVE ENSEMBLE LOSS if the ensemble were to be extended
    to include that particular trial.

    """
    new_id, = new_ids

    valid_docs = [t for t in trials
            if t['state'] != hyperopt.JOB_STATE_ERROR]

    # -- ok_docs are those which are eligible to be a member of the
    #    final ensemble.
    ok_docs = [t for t in valid_docs
            if t['result']['status'] == hyperopt.STATUS_OK
               and t['result']['loss'] <= absolute_loss_thresh]

    logger.info('n_ok: %i n_valid: %i' % (len(ok_docs), len(valid_docs)))

    valid_helper = BoostHelper(valid_docs)
    ok_helper = BoostHelper(ok_docs)

    cur_parent = None
    cur_parent_tid = None
    while True:
        n_ok_children = len(ok_helper.children(cur_parent))
        n_valid_children = len(valid_helper.children(cur_parent))
        logger.info('cur_parent: %s  n_ok_children: %i  n_valid_children: %i'
                    % (None if cur_parent is None else cur_parent['tid'],
                       n_ok_children,
                       n_valid_children))
        if n_ok_children < min_ok_per_round:
            break
        if n_valid_children < min_valid_per_round:
            break

        best_child = ok_helper.best_child(cur_parent)
        assert best_child is not None  # -- because ok_helper has some elements

        if None not in (cur_parent, relative_loss_thresh):
            rel_thresh = cur_parent['result']['loss'] * relative_loss_thresh
            if best_child['result']['loss'] >= rel_thresh:
                break

        logger.info('best_child: %i' % best_child['tid'])
        cur_parent = best_child
        cur_parent_tid = best_child['tid']
        del best_child

    cur_siblings = valid_helper.children(cur_parent)

    current_trials = trials_from_docs(
            cur_siblings,
            exp_key=trials._exp_key,
            # -- validate=False is much faster
            validate=False)

    new_trial_docs = sub_suggest([new_id], domain, current_trials)

    for trial in new_trial_docs:
        misc = trial['misc']
        # -- boosting cannot be nested with current data structure
        assert 'boosting' not in misc
        # -- I think the following was a debugging sanity check
        assert trial['tid'] == new_id
        misc['boosting'] = {
            'variant': {
                'name': 'async_suggest',
                'min_ok_per_round': min_ok_per_round,
                'min_valid_per_round': min_valid_per_round,
                'relative_loss_thresh': relative_loss_thresh,
                'absolute_loss_thresh': absolute_loss_thresh,
                },
            'continues': cur_parent_tid}

    return new_trial_docs
Example #14
0
 def idxs_vals_from_ids(self, ids, seed):
     docs = self.suggest(ids, self.domain, Trials(), seed)
     trials = trials_from_docs(docs)
     idxs, vals = miscs_to_idxs_vals(trials.miscs)
     return idxs, vals
def suggest(
    new_ids,
    domain,
    trials,
    sub_suggest,
    min_ok_per_round=1,
    min_valid_per_round=1,
    absolute_loss_thresh=1.0,
    relative_loss_thresh=None,
):
    """

    Parameters
    ----------

    min_ok_per_round - int
        A trial cannot be extended in the ensemble until it has this many
        siblings with status 'ok' and a loss <= absolute_loss_thresh.

    min_valid_per_round - int
        A trial cannot be extended in the ensemble until it has this many
        siblings whose job state is not ERROR.

    absolute_loss_thresh - float
        Jobs with loss greater than this are not counted as 'ok'.

    relative_loss_thresh - None or float
        A child cannot become a parent in the ensemble unless it improves on its
        parent with a loss <= relative_loss_thresh * parent_loss.

    This search algo works by injecting a ['misc']['boosting'] subdocument into
    every trial, with keys:
      * variant - identify the type of boosting at work
      * continues - the trial ID (tid) of the previously selected trial in the
                    ensemble, or `None` for first-round trials

    In order for boosting to work properly, the 'loss' reported by trial must
    represent the CUMULATIVE ENSEMBLE LOSS if the ensemble were to be extended
    to include that particular trial.

    """
    new_id, = new_ids

    valid_docs = [t for t in trials if t['state'] != hyperopt.JOB_STATE_ERROR]

    # -- ok_docs are those which are eligible to be a member of the
    #    final ensemble.
    ok_docs = [
        t for t in valid_docs if t['result']['status'] == hyperopt.STATUS_OK
        and t['result']['loss'] <= absolute_loss_thresh
    ]

    logger.info('n_ok: %i n_valid: %i' % (len(ok_docs), len(valid_docs)))

    valid_helper = BoostHelper(valid_docs)
    ok_helper = BoostHelper(ok_docs)

    cur_parent = None
    cur_parent_tid = None
    while True:
        n_ok_children = len(ok_helper.children(cur_parent))
        n_valid_children = len(valid_helper.children(cur_parent))
        logger.info('cur_parent: %s  n_ok_children: %i  n_valid_children: %i' %
                    (None if cur_parent is None else cur_parent['tid'],
                     n_ok_children, n_valid_children))
        if n_ok_children < min_ok_per_round:
            break
        if n_valid_children < min_valid_per_round:
            break

        best_child = ok_helper.best_child(cur_parent)
        assert best_child is not None  # -- because ok_helper has some elements

        if None not in (cur_parent, relative_loss_thresh):
            rel_thresh = cur_parent['result']['loss'] * relative_loss_thresh
            if best_child['result']['loss'] >= rel_thresh:
                break

        logger.info('best_child: %i' % best_child['tid'])
        cur_parent = best_child
        cur_parent_tid = best_child['tid']
        del best_child

    cur_siblings = valid_helper.children(cur_parent)

    current_trials = trials_from_docs(
        cur_siblings,
        exp_key=trials._exp_key,
        # -- validate=False is much faster
        validate=False)

    new_trial_docs = sub_suggest([new_id], domain, current_trials)

    for trial in new_trial_docs:
        misc = trial['misc']
        # -- boosting cannot be nested with current data structure
        assert 'boosting' not in misc
        # -- I think the following was a debugging sanity check
        assert trial['tid'] == new_id
        misc['boosting'] = {
            'variant': {
                'name': 'async_suggest',
                'min_ok_per_round': min_ok_per_round,
                'min_valid_per_round': min_valid_per_round,
                'relative_loss_thresh': relative_loss_thresh,
                'absolute_loss_thresh': absolute_loss_thresh,
            },
            'continues': cur_parent_tid
        }

    return new_trial_docs