Example #1
0
        def work(self):
            bandit = self.bandit
            random_algo = Random(bandit)
            # build an experiment of 10 trials
            trials = Trials()
            exp = Experiment(trials, random_algo)
            #print random_algo.s_specs_idxs_vals
            exp.run(10)
            ids = trials.tids
            assert len(ids) == 10
            tpe_algo = TreeParzenEstimator(bandit)
            #print pyll.as_apply(tpe_algo.post_idxs)
            #print pyll.as_apply(tpe_algo.post_vals)
            argmemo = {}

            print trials.miscs
            idxs, vals = miscs_to_idxs_vals(trials.miscs)
            argmemo[tpe_algo.observed['idxs']] = idxs
            argmemo[tpe_algo.observed['vals']] = vals
            argmemo[tpe_algo.observed_loss['idxs']] = trials.tids
            argmemo[tpe_algo.observed_loss['vals']] = trials.losses()
            stuff = pyll.rec_eval(
                [tpe_algo.post_below['idxs'], tpe_algo.post_below['vals']],
                memo=argmemo)
            print stuff
Example #2
0
def test_opt_qn_normal(f=hp_normal):
    bandit = Bandit(
            {'loss': -scope.sum([f('v%i' % ii, 0, 1)
                for ii in range(25)]) ** 2},
            loss_target=0)
    algo = TreeParzenEstimator(bandit,
            prior_weight=.5,
            n_startup_jobs=0,
            n_EI_candidates=1,
            gamma=0.15)
    trials = Trials()
    experiment = Experiment(trials, algo, async=False)
    experiment.max_queue_len = 1
    experiment.run(40)
    print list(sorted(trials.losses()))

    idxs, vals = miscs_to_idxs_vals(trials.miscs)

    if 1:
        import hyperopt.plotting
        hyperopt.plotting.main_plot_vars(trials, bandit, do_show=1)
    else:
        import matplotlib.pyplot as plt
        begin = [v[:10] for k, v in vals.items()]
        end = [v[-10:] for k, v in vals.items()]
        plt.subplot(2, 1, 1)
        plt.title('before')
        plt.hist(np.asarray(begin).flatten())
        plt.subplot(2, 1, 2)
        plt.title('after')
        plt.hist(np.asarray(end).flatten())
        plt.show()
def test_injector(trials):
    # -- test is disabled because CoinFlipInjector is gone
    # The point of the test would be to ensure that there is no problem
    # submitting jobs from worker processes.

    CoinFlipInjector = None  # XXX find old def with `git grep`
    bandit_algo = hyperopt.Random(CoinFlipInjector(), cmd=("bandit_json evaluate", "hyperopt.base.CoinFlipInjector"))
    # -- also test that injections from a particular experiment (exp_key)
    #    are visible only within that experiment.
    view2 = trials.view(exp_key="fff")
    view3 = trials.view(exp_key="asdf")
    assert len(trials) == 0
    exp = Experiment(view2, bandit_algo, max_queue_len=1, async=True)
    exp.run(1, block_until_done=True)
    ##even though we ran 1 trial, there are 2 results because one was injected
    trials.refresh()
    view2.refresh()
    view3.refresh()
    assert len(trials) == 2
    assert len(view2) == 2
    assert len(view3) == 0

    exp.run(1, block_until_done=True)
    trials.refresh()
    view2.refresh()
    view3.refresh()
    assert len(trials) == 4
    assert len(view2) == 4
    assert len(view3) == 0

    tids = [d["tid"] for d in trials]
    for doc in trials:
        if "from_tid" in doc["misc"]:
            assert doc["misc"]["from_tid"] in tids
        assert doc["exp_key"] == view2._exp_key
Example #4
0
 def work(self):
     trials = Trials()
     bandit = self.bandit
     tpe_algo = TreeParzenEstimator(bandit)
     tpe_algo.n_EI_candidates = 3
     exp = Experiment(trials, tpe_algo)
     exp.run(10)
Example #5
0
        def work(self):
            bandit = self.bandit
            random_algo = Random(bandit)
            # build an experiment of 10 trials
            trials = Trials()
            exp = Experiment(trials, random_algo)
            #print random_algo.s_specs_idxs_vals
            exp.run(10)
            ids = trials.tids
            assert len(ids) == 10
            tpe_algo = TreeParzenEstimator(bandit)
            #print pyll.as_apply(tpe_algo.post_idxs)
            #print pyll.as_apply(tpe_algo.post_vals)
            argmemo = {}

            print trials.miscs
            idxs, vals = miscs_to_idxs_vals(trials.miscs)
            argmemo[tpe_algo.observed['idxs']] = idxs
            argmemo[tpe_algo.observed['vals']] = vals
            argmemo[tpe_algo.observed_loss['idxs']] = trials.tids
            argmemo[tpe_algo.observed_loss['vals']] = trials.losses()
            stuff = pyll.rec_eval([tpe_algo.post_below['idxs'],
                        tpe_algo.post_below['vals']],
                        memo=argmemo)
            print stuff
Example #6
0
 def work(self):
     trials = Trials()
     bandit = self.bandit
     tpe_algo = TreeParzenEstimator(bandit)
     tpe_algo.n_EI_candidates = 3
     exp = Experiment(trials, tpe_algo)
     exp.run(10)
Example #7
0
    def work(self):

        bandit = self.bandit
        assert bandit.name is not None
        print 'Bandit', bandit.name
        algo = TreeParzenEstimator(
            bandit,
            gamma=self.gammas.get(bandit.name, TreeParzenEstimator.gamma),
            prior_weight=self.prior_weights.get(
                bandit.name, TreeParzenEstimator.prior_weight),
            n_EI_candidates=self.n_EIs.get(
                bandit.name, TreeParzenEstimator.n_EI_candidates),
        )
        LEN = self.LEN.get(bandit.name, 50)

        trials = Trials()
        exp = Experiment(trials, algo)
        exp.catch_bandit_exceptions = False
        exp.run(LEN)
        assert len(trials) == LEN

        if 1:
            rtrials = Trials()
            exp = Experiment(rtrials, Random(bandit))
            exp.run(LEN)
            print 'RANDOM MINS', list(sorted(rtrials.losses()))[:6]
            #logx = np.log([s['x'] for s in rtrials.specs])
            #print 'RND MEAN', np.mean(logx)
            #print 'RND STD ', np.std(logx)

        print algo.n_EI_candidates
        print algo.gamma
        print algo.prior_weight

        if 0:
            plt.subplot(2, 2, 1)
            plt.scatter(range(LEN), trials.losses())
            plt.title('TPE losses')
            plt.subplot(2, 2, 2)
            plt.scatter(range(LEN), ([s['x'] for s in trials.specs]))
            plt.title('TPE x')
            plt.subplot(2, 2, 3)
            plt.title('RND losses')
            plt.scatter(range(LEN), rtrials.losses())
            plt.subplot(2, 2, 4)
            plt.title('RND x')
            plt.scatter(range(LEN), ([s['x'] for s in rtrials.specs]))
            plt.show()
        if 0:
            plt.hist([t['x'] for t in self.experiment.trials], bins=20)

        #print trials.losses()
        print 'TPE    MINS', list(sorted(trials.losses()))[:6]
        #logx = np.log([s['x'] for s in trials.specs])
        #print 'TPE MEAN', np.mean(logx)
        #print 'TPE STD ', np.std(logx)
        thresh = self.thresholds[bandit.name]
        print 'Thresh', thresh
        assert min(trials.losses()) < thresh
Example #8
0
 def test_basic(self):
     bandit = self._bandit_cls()
     algo = Random(bandit)
     trials = Trials()
     experiment = Experiment(trials, algo, async=False)
     experiment.max_queue_len = 50
     experiment.run(self._n_steps)
     print
     print self._bandit_cls
     print bandit.loss_target
     print trials.average_best_error(bandit)
     assert trials.average_best_error(bandit) - bandit.loss_target  < .2
     print
Example #9
0
 def test_basic(self):
     bandit = self._bandit_cls()
     print 'bandit params', bandit.params
     algo = Random(bandit)
     print 'algo params', algo.vh.params
     trials = Trials()
     experiment = Experiment(trials, algo, async=False)
     experiment.catch_bandit_exceptions = False
     experiment.max_queue_len = 50
     experiment.run(self._n_steps)
     print
     print self._bandit_cls
     print bandit.loss_target
     print trials.average_best_error(bandit)
     assert trials.average_best_error(bandit) - bandit.loss_target  < .2
     print
Example #10
0
 def setUp(self):
     bandit = self.bandit = many_dists()
     algo = TreeParzenEstimator(bandit)
     trials = Trials()
     experiment = Experiment(trials, algo, async=False)
     experiment.max_queue_len = 1
     N=200
     if 0:
         import cProfile
         stats = cProfile.runctx('experiment.run(N)', globals={},
                 locals=locals(), filename='fooprof')
         import pstats
         p = pstats.Stats('fooprof')
         p.sort_stats('cumulative').print_stats(10)
         p.sort_stats('time').print_stats(10)
     else:
         experiment.run(N)
     self.trials = trials
Example #11
0
def test_injector(trials):
    bandit_algo = hyperopt.Random(CoinFlipInjector(),
                 cmd=('bandit_json evaluate','hyperopt.base.CoinFlipInjector'))
    # -- also test that injections from a particular experiment (exp_key)
    #    are visible only within that experiment.
    view2 = trials.view(exp_key='fff')
    view3 = trials.view(exp_key='asdf')
    assert len(trials) == 0
    exp = Experiment(view2, bandit_algo, max_queue_len=1, async=True)
    exp.run(1, block_until_done=True)
    ##even though we ran 1 trial, there are 2 results because one was injected
    trials.refresh()
    view2.refresh()
    view3.refresh()
    assert len(trials) == 2
    assert len(view2) == 2
    assert len(view3) == 0

    exp.run(1, block_until_done=True)
    trials.refresh()
    view2.refresh()
    view3.refresh()
    assert len(trials) == 4
    assert len(view2) == 4
    assert len(view3) == 0

    tids = [d['tid'] for d in trials]
    for doc in trials:
        if 'from_tid' in doc['misc']:
            assert doc['misc']['from_tid'] in tids
        assert doc['exp_key'] == view2._exp_key
Example #12
0
def test_injector(trials):
    # -- test is disabled because CoinFlipInjector is gone
    # The point of the test would be to ensure that there is no problem
    # submitting jobs from worker processes.

    CoinFlipInjector = None  # XXX find old def with `git grep`
    bandit_algo = hyperopt.Random(CoinFlipInjector(),
                                  cmd=('bandit_json evaluate',
                                       'hyperopt.base.CoinFlipInjector'))
    # -- also test that injections from a particular experiment (exp_key)
    #    are visible only within that experiment.
    view2 = trials.view(exp_key='fff')
    view3 = trials.view(exp_key='asdf')
    assert len(trials) == 0
    exp = Experiment(view2, bandit_algo, max_queue_len=1, async=True)
    exp.run(1, block_until_done=True)
    ##even though we ran 1 trial, there are 2 results because one was injected
    trials.refresh()
    view2.refresh()
    view3.refresh()
    assert len(trials) == 2
    assert len(view2) == 2
    assert len(view3) == 0

    exp.run(1, block_until_done=True)
    trials.refresh()
    view2.refresh()
    view3.refresh()
    assert len(trials) == 4
    assert len(view2) == 4
    assert len(view3) == 0

    tids = [d['tid'] for d in trials]
    for doc in trials:
        if 'from_tid' in doc['misc']:
            assert doc['misc']['from_tid'] in tids
        assert doc['exp_key'] == view2._exp_key
Example #13
0
    def work(self):
        """
        Run a small experiment with several workers running in parallel
        using Python threads.
        """
        n_threads = self.n_threads
        jobs_per_thread = self.jobs_per_thread
        n_trials_per_exp = n_threads * jobs_per_thread
        n_trials_total = n_trials_per_exp * len(self.exp_keys)
        
        with TempMongo() as tm:
            mj = tm.mongo_jobs('foodb')
            def newth(ii):
                n_jobs = jobs_per_thread * len(self.exp_keys)
                return threading.Thread(
                        target=self.worker_thread_fn,
                        args=(('hostname', ii), n_jobs, 30.0))
            threads = map(newth, range(n_threads))
            [th.start() for th in threads]

            exp_list = []
            trials_list = []
            try:
                for key in self.exp_keys:
                    print 'running experiment'
                    trials = MongoTrials(tm.connection_string('foodb'), key)
                    assert len(trials) == 0
                    if hasattr(self, 'prep_trials'):
                        self.prep_trials(trials)
                    bandit = self.bandit
                    if self.use_stop:
                        bandit_algo = RandomStop(n_threads * jobs_per_thread,
                                                    self.bandit, cmd=self.cmd)
                        print bandit_algo
                        exp = Experiment(trials, bandit_algo, max_queue_len=1)
                        exp.run(sys.maxint, block_until_done=False)
                    else:
                        bandit_algo = Random(self.bandit, cmd=self.cmd)
                        exp = Experiment(trials, bandit_algo,
                                                       max_queue_len=10000)
                        exp.run(n_threads * jobs_per_thread,
                                 block_until_done=(len(self.exp_keys) == 1))
                    exp_list.append(exp)
                    trials_list.append(trials)
            finally:
                print 'joining worker thread...'
                [th.join() for th in threads]

            for exp in exp_list:
                exp.block_until_done()

            for trials in trials_list:
                assert trials.count_by_state_synced(JOB_STATE_DONE)\
                        == n_trials_per_exp, (trials.count_by_state_synced(JOB_STATE_DONE), n_trials_per_exp)
                assert trials.count_by_state_unsynced(JOB_STATE_DONE)\
                        == n_trials_per_exp
                assert len(trials) == n_trials_per_exp, (
                    'trials failure %d %d ' % (len(trials) , n_trials_per_exp))
                assert len(trials.results) == n_trials_per_exp, (
                    'results failure %d %d ' % (len(trials.results),
                        n_trials_per_exp))
            all_trials = MongoTrials(tm.connection_string('foodb'))
            assert len(all_trials) == n_trials_total
Example #14
0
    def work(self):

        bandit = self.bandit
        assert bandit.name is not None
        print 'Bandit', bandit.name
        algo = TreeParzenEstimator(bandit,
                gamma=self.gammas.get(bandit.name,
                    TreeParzenEstimator.gamma),
                prior_weight=self.prior_weights.get(bandit.name,
                    TreeParzenEstimator.prior_weight),
                n_EI_candidates=self.n_EIs.get(bandit.name,
                    TreeParzenEstimator.n_EI_candidates),
                )
        LEN = self.LEN.get(bandit.name, 50)

        trials = Trials()
        exp = Experiment(trials, algo)
        exp.catch_bandit_exceptions = False
        exp.run(LEN)
        assert len(trials) == LEN

        if 1:
            rtrials = Trials()
            exp = Experiment(rtrials, Random(bandit))
            exp.run(LEN)
            print 'RANDOM MINS', list(sorted(rtrials.losses()))[:6]
            #logx = np.log([s['x'] for s in rtrials.specs])
            #print 'RND MEAN', np.mean(logx)
            #print 'RND STD ', np.std(logx)

        print algo.n_EI_candidates
        print algo.gamma
        print algo.prior_weight

        if 0:
            plt.subplot(2, 2, 1)
            plt.scatter(range(LEN), trials.losses())
            plt.title('TPE losses')
            plt.subplot(2, 2, 2)
            plt.scatter(range(LEN), ([s['x'] for s in trials.specs]))
            plt.title('TPE x')
            plt.subplot(2, 2, 3)
            plt.title('RND losses')
            plt.scatter(range(LEN), rtrials.losses())
            plt.subplot(2, 2, 4)
            plt.title('RND x')
            plt.scatter(range(LEN), ([s['x'] for s in rtrials.specs]))
            plt.show()
        if 0:
            plt.hist(
                    [t['x'] for t in self.experiment.trials],
                    bins=20)

        #print trials.losses()
        print 'TPE    MINS', list(sorted(trials.losses()))[:6]
        #logx = np.log([s['x'] for s in trials.specs])
        #print 'TPE MEAN', np.mean(logx)
        #print 'TPE STD ', np.std(logx)
        thresh = self.thresholds[bandit.name]
        print 'Thresh', thresh
        assert min(trials.losses()) < thresh
Example #15
0
    def work(self, **kwargs):
        self.__dict__.update(kwargs)
        bandit = opt_q_uniform(self.target)
        prior_weight = 2.5
        gamma = 0.20
        algo = TreeParzenEstimator(bandit,
                prior_weight=prior_weight,
                n_startup_jobs=2,
                n_EI_candidates=128,
                gamma=gamma)
        print algo.opt_idxs['x']
        print algo.opt_vals['x']

        trials = Trials()
        experiment = Experiment(trials, algo)
        experiment.run(self.LEN)
        if self.show_vars:
            import hyperopt.plotting
            hyperopt.plotting.main_plot_vars(trials, bandit, do_show=1)

        idxs, vals = miscs_to_idxs_vals(trials.miscs)
        idxs = idxs['x']
        vals = vals['x']
        print "VALS", vals

        losses = trials.losses()

        from hyperopt.tpe import ap_filter_trials
        from hyperopt.tpe import adaptive_parzen_samplers

        qu = scope.quniform(1.01, 10, 1)
        fn = adaptive_parzen_samplers['quniform']
        fn_kwargs = dict(size=(4,), rng=np.random)
        s_below = pyll.Literal()
        s_above = pyll.Literal()
        b_args = [s_below, prior_weight] + qu.pos_args
        b_post = fn(*b_args, **fn_kwargs)
        a_args = [s_above, prior_weight] + qu.pos_args
        a_post = fn(*a_args, **fn_kwargs)

        #print b_post
        #print a_post
        fn_lpdf = getattr(scope, a_post.name + '_lpdf')
        print fn_lpdf
        # calculate the llik of b_post under both distributions
        a_kwargs = dict([(n, a) for n, a in a_post.named_args
                    if n not in ('rng', 'size')])
        b_kwargs = dict([(n, a) for n, a in b_post.named_args
                    if n not in ('rng', 'size')])
        below_llik = fn_lpdf(*([b_post] + b_post.pos_args), **b_kwargs)
        above_llik = fn_lpdf(*([b_post] + a_post.pos_args), **a_kwargs)
        new_node = scope.broadcast_best(b_post, below_llik, above_llik)

        print '=' * 80

        do_show = self.show_steps

        import matplotlib.pyplot as plt
        for ii in range(2, 9):
            if ii > len(idxs):
                break
            print '-' * 80
            print 'ROUND', ii
            print '-' * 80
            all_vals = [2, 3, 4, 5, 6, 7, 8, 9, 10]
            below, above = ap_filter_trials(idxs[:ii],
                    vals[:ii], idxs[:ii], losses[:ii], gamma)
            below = below.astype('int')
            above = above.astype('int')
            print 'BB0', below
            print 'BB1', above
            #print 'BELOW',  zip(range(100), np.bincount(below, minlength=11))
            #print 'ABOVE',  zip(range(100), np.bincount(above, minlength=11))
            memo = {b_post: all_vals, s_below: below, s_above: above}
            bl, al, nv = pyll.rec_eval([below_llik, above_llik, new_node],
                    memo=memo)
            #print bl - al
            print 'BB2', dict(zip(all_vals, bl - al))
            print 'BB3', dict(zip(all_vals, bl))
            print 'BB4', dict(zip(all_vals, al))
            print 'ORIG PICKED', vals[ii]
            print 'PROPER OPT PICKS:', nv

            #assert np.allclose(below, [3, 3, 9])
            #assert len(below) + len(above) == len(vals)

            if do_show:
                plt.subplot(8, 1, ii)
                #plt.scatter(all_vals,
                #    np.bincount(below, minlength=11)[2:], c='b')
                #plt.scatter(all_vals,
                #    np.bincount(above, minlength=11)[2:], c='c')
                plt.scatter(all_vals, bl, c='g')
                plt.scatter(all_vals, al, c='r')
        if do_show:
            plt.show()
Example #16
0
    #   ...(other options not yet implemented)...
    "search_strategy": "grid",
    # `grid_search_space` specifies values to be searched for each hyperparameter
    #   each entry needs to follow the format {"param_name" : List(Any)}
    "grid_search_space": {
        "layer1_nodes": [16, 32, 64],
        "layer2_nodes": [16, 32, 64],
        "optimizer": ["adam", "sgd"],
    },
    # `grid_search_settings` contain other settings for grid search strategy
    #   `save_every_n_outputs`: how often should the trial results be saved.
    #       The more often we save the results, the less likely we lose data in
    #       the event of a crash, but it takes more time.
    #   `num_samples`: how many repeated trials to run for each point in search space
    #       [Not yet implemented]
    "grid_search_settings": {
        "save_every_n_outputs": 1,
        "num_samples": 1
    }
}

# trainer must be a function that takes an hpset as input and returns (hpset,
# metric, logs) as output
#   `hpset`: hyperparameter values, see hyperopt.Experiment._generate_hpsets
#   `metric`: objective for maximization, evaluated at the point specified by `hpset`
#   `logs`: any other useful information that should be saved

exper = Experiment(trainer, config)
exper.search()
exper.summary()
Example #17
0
    def work(self, **kwargs):
        self.__dict__.update(kwargs)
        bandit = opt_q_uniform(self.target)
        prior_weight = 2.5
        gamma = 0.20
        algo = TreeParzenEstimator(bandit,
                                   prior_weight=prior_weight,
                                   n_startup_jobs=2,
                                   n_EI_candidates=128,
                                   gamma=gamma)
        print algo.opt_idxs['x']
        print algo.opt_vals['x']

        trials = Trials()
        experiment = Experiment(trials, algo)
        experiment.run(self.LEN)
        if self.show_vars:
            import hyperopt.plotting
            hyperopt.plotting.main_plot_vars(trials, bandit, do_show=1)

        idxs, vals = miscs_to_idxs_vals(trials.miscs)
        idxs = idxs['x']
        vals = vals['x']
        print "VALS", vals

        losses = trials.losses()

        from hyperopt.tpe import ap_filter_trials
        from hyperopt.tpe import adaptive_parzen_samplers

        qu = scope.quniform(1.01, 10, 1)
        fn = adaptive_parzen_samplers['quniform']
        fn_kwargs = dict(size=(4, ), rng=np.random)
        s_below = pyll.Literal()
        s_above = pyll.Literal()
        b_args = [s_below, prior_weight] + qu.pos_args
        b_post = fn(*b_args, **fn_kwargs)
        a_args = [s_above, prior_weight] + qu.pos_args
        a_post = fn(*a_args, **fn_kwargs)

        #print b_post
        #print a_post
        fn_lpdf = getattr(scope, a_post.name + '_lpdf')
        print fn_lpdf
        # calculate the llik of b_post under both distributions
        a_kwargs = dict([(n, a) for n, a in a_post.named_args
                         if n not in ('rng', 'size')])
        b_kwargs = dict([(n, a) for n, a in b_post.named_args
                         if n not in ('rng', 'size')])
        below_llik = fn_lpdf(*([b_post] + b_post.pos_args), **b_kwargs)
        above_llik = fn_lpdf(*([b_post] + a_post.pos_args), **a_kwargs)
        new_node = scope.broadcast_best(b_post, below_llik, above_llik)

        print '=' * 80

        do_show = self.show_steps

        import matplotlib.pyplot as plt
        for ii in range(2, 9):
            if ii > len(idxs):
                break
            print '-' * 80
            print 'ROUND', ii
            print '-' * 80
            all_vals = [2, 3, 4, 5, 6, 7, 8, 9, 10]
            below, above = ap_filter_trials(idxs[:ii], vals[:ii], idxs[:ii],
                                            losses[:ii], gamma)
            below = below.astype('int')
            above = above.astype('int')
            print 'BB0', below
            print 'BB1', above
            #print 'BELOW',  zip(range(100), np.bincount(below, minlength=11))
            #print 'ABOVE',  zip(range(100), np.bincount(above, minlength=11))
            memo = {b_post: all_vals, s_below: below, s_above: above}
            bl, al, nv = pyll.rec_eval([below_llik, above_llik, new_node],
                                       memo=memo)
            #print bl - al
            print 'BB2', dict(zip(all_vals, bl - al))
            print 'BB3', dict(zip(all_vals, bl))
            print 'BB4', dict(zip(all_vals, al))
            print 'ORIG PICKED', vals[ii]
            print 'PROPER OPT PICKS:', nv

            #assert np.allclose(below, [3, 3, 9])
            #assert len(below) + len(above) == len(vals)

            if do_show:
                plt.subplot(8, 1, ii)
                #plt.scatter(all_vals,
                #    np.bincount(below, minlength=11)[2:], c='b')
                #plt.scatter(all_vals,
                #    np.bincount(above, minlength=11)[2:], c='c')
                plt.scatter(all_vals, bl, c='g')
                plt.scatter(all_vals, al, c='r')
        if do_show:
            plt.show()