def test_clone(): config = config0() config2 = clone(config) nodeset = set(dfs(config)) assert not any(n in nodeset for n in dfs(config2)) foo = recursive_set_rng_kwarg(config, scope.rng_from_seed(5)) r = rec_eval(foo) print r r2 = rec_eval(recursive_set_rng_kwarg(config2, scope.rng_from_seed(5))) print r2 assert r == r2
def get_random_idxs_vals(new_id, domain, rng): return pyll.rec_eval( domain.s_idxs_vals, memo={ domain.s_new_ids: [new_id], domain.s_rng: rng, })
def test_vectorize_simple(): N = as_apply(15) p0 = hp_uniform('p0', 0, 1) loss = p0**2 print loss expr_idxs = scope.range(N) vh = VectorizeHelper(loss, expr_idxs, build=True) vloss = vh.v_expr full_output = as_apply([vloss, vh.idxs_by_label(), vh.vals_by_label()]) fo2 = replace_repeat_stochastic(full_output) new_vc = recursive_set_rng_kwarg( fo2, as_apply(np.random.RandomState(1)), ) #print new_vc losses, idxs, vals = rec_eval(new_vc) print 'losses', losses print 'idxs p0', idxs['p0'] print 'vals p0', vals['p0'] p0dct = dict(zip(idxs['p0'], vals['p0'])) for ii, li in enumerate(losses): assert p0dct[ii]**2 == li
def work(self): bandit = self.bandit random_algo = Random(bandit) # build an experiment of 10 trials trials = Trials() exp = Experiment(trials, random_algo) #print random_algo.s_specs_idxs_vals exp.run(10) ids = trials.tids assert len(ids) == 10 tpe_algo = TreeParzenEstimator(bandit) #print pyll.as_apply(tpe_algo.post_idxs) #print pyll.as_apply(tpe_algo.post_vals) argmemo = {} print trials.miscs idxs, vals = miscs_to_idxs_vals(trials.miscs) argmemo[tpe_algo.observed['idxs']] = idxs argmemo[tpe_algo.observed['vals']] = vals argmemo[tpe_algo.observed_loss['idxs']] = trials.tids argmemo[tpe_algo.observed_loss['vals']] = trials.losses() stuff = pyll.rec_eval( [tpe_algo.post_below['idxs'], tpe_algo.post_below['vals']], memo=argmemo) print stuff
def test_pyll_list_tuple_nested(): x = as_partialplus([[5, 3, (5, 3)], (4, 5)]) y = as_pyll(x) # rec_eval always uses tuple val_y = rec_eval(y) # Correct for tuple-only in rec_eval. assert evaluate(x) == [list(val_y[0]), val_y[1]]
def test_vectorize_simple(): N = as_apply(15) p0 = hp_uniform('p0', 0, 1) loss = p0 ** 2 print(loss) expr_idxs = scope.range(N) vh = VectorizeHelper(loss, expr_idxs, build=True) vloss = vh.v_expr full_output = as_apply([vloss, vh.idxs_by_label(), vh.vals_by_label()]) fo2 = replace_repeat_stochastic(full_output) new_vc = recursive_set_rng_kwarg( fo2, as_apply(np.random.RandomState(1)), ) #print new_vc losses, idxs, vals = rec_eval(new_vc) print('losses', losses) print('idxs p0', idxs['p0']) print('vals p0', vals['p0']) p0dct = dict(list(zip(idxs['p0'], vals['p0']))) for ii, li in enumerate(losses): assert p0dct[ii] ** 2 == li
def work(self): bandit = self.bandit random_algo = Random(bandit) # build an experiment of 10 trials trials = Trials() exp = Experiment(trials, random_algo) #print random_algo.s_specs_idxs_vals exp.run(10) ids = trials.tids assert len(ids) == 10 tpe_algo = TreeParzenEstimator(bandit) #print pyll.as_apply(tpe_algo.post_idxs) #print pyll.as_apply(tpe_algo.post_vals) argmemo = {} print trials.miscs idxs, vals = miscs_to_idxs_vals(trials.miscs) argmemo[tpe_algo.observed['idxs']] = idxs argmemo[tpe_algo.observed['vals']] = vals argmemo[tpe_algo.observed_loss['idxs']] = trials.tids argmemo[tpe_algo.observed_loss['vals']] = trials.losses() stuff = pyll.rec_eval([tpe_algo.post_below['idxs'], tpe_algo.post_below['vals']], memo=argmemo) print stuff
def test_vectorize_multipath(): N = as_apply(15) p0 = hp_uniform("p0", 0, 1) loss = hp_choice("p1", [1, p0, -p0])**2 expr_idxs = scope.range(N) vh = VectorizeHelper(loss, expr_idxs, build=True) vloss = vh.v_expr print(vloss) full_output = as_apply([vloss, vh.idxs_by_label(), vh.vals_by_label()]) new_vc = recursive_set_rng_kwarg(full_output, as_apply(np.random.RandomState(1))) losses, idxs, vals = rec_eval(new_vc) print("losses", losses) print("idxs p0", idxs["p0"]) print("vals p0", vals["p0"]) print("idxs p1", idxs["p1"]) print("vals p1", vals["p1"]) p0dct = dict(list(zip(idxs["p0"], vals["p0"]))) p1dct = dict(list(zip(idxs["p1"], vals["p1"]))) for ii, li in enumerate(losses): print(ii, li) if p1dct[ii] != 0: assert li == p0dct[ii]**2 else: assert li == 1
def test_vectorize_multipath(): N = as_apply(15) p0 = hp_uniform('p0', 0, 1) loss = hp_choice('p1', [1, p0, -p0])**2 expr_idxs = scope.range(N) vh = VectorizeHelper(loss, expr_idxs, build=True) vloss = vh.v_expr print vloss full_output = as_apply([vloss, vh.idxs_by_label(), vh.vals_by_label()]) new_vc = recursive_set_rng_kwarg( full_output, as_apply(np.random.RandomState(1)), ) losses, idxs, vals = rec_eval(new_vc) print 'losses', losses print 'idxs p0', idxs['p0'] print 'vals p0', vals['p0'] print 'idxs p1', idxs['p1'] print 'vals p1', vals['p1'] p0dct = dict(zip(idxs['p0'], vals['p0'])) p1dct = dict(zip(idxs['p1'], vals['p1'])) for ii, li in enumerate(losses): print ii, li if p1dct[ii] != 0: assert li == p0dct[ii]**2 else: assert li == 1
def test_vectorize_multipath(): N = as_apply(15) p0 = hp_uniform('p0', 0, 1) loss = hp_choice('p1', [1, p0, -p0]) ** 2 expr_idxs = scope.range(N) vh = VectorizeHelper(loss, expr_idxs, build=True) vloss = vh.v_expr print(vloss) full_output = as_apply([vloss, vh.idxs_by_label(), vh.vals_by_label()]) new_vc = recursive_set_rng_kwarg( full_output, as_apply(np.random.RandomState(1)), ) losses, idxs, vals = rec_eval(new_vc) print('losses', losses) print('idxs p0', idxs['p0']) print('vals p0', vals['p0']) print('idxs p1', idxs['p1']) print('vals p1', vals['p1']) p0dct = dict(list(zip(idxs['p0'], vals['p0']))) p1dct = dict(list(zip(idxs['p1'], vals['p1']))) for ii, li in enumerate(losses): print(ii, li) if p1dct[ii] != 0: assert li == p0dct[ii] ** 2 else: assert li == 1
def draw_n_feature_vecs(self, N, rng): fake_ids = range(N) idxs, vals = pyll.rec_eval(self.domain.s_idxs_vals, memo={ self.domain.s_new_ids: fake_ids, self.domain.s_rng: rng, }) return self.features_from_idxs_vals(fake_ids, idxs, vals)
def __call__(self, expr, memo, ctrl): pyll_rval = pyll.rec_eval(expr, memo=memo, print_node_on_error=False) if 'loss' in ctrl.current_trial['misc']['vals']: loss = ctrl.current_trial['misc']['vals']['loss'][0] else: loss = None pyll_rval.update({'loss': loss}) return self.model_evaluator(pyll_rval)
def test_dict(): x = as_partialplus({'x': partial(float, partial(float, partial(int, 3.3))) / 2, 'y': partial(float, 3) }) y = as_pyll(x) assert evaluate(x) == rec_eval(y)
def test_recursive_set_rng_kwarg(): uniform = scope.uniform a = as_apply([uniform(0, 1), uniform(2, 3)]) rng = np.random.RandomState(234) recursive_set_rng_kwarg(a, rng) print a val_a = rec_eval(a) assert 0 < val_a[0] < 1 assert 2 < val_a[1] < 3
def draw_n_feature_vecs(self, N, rng): fake_ids = range(N) idxs, vals = pyll.rec_eval( self.domain.s_idxs_vals, memo={ self.domain.s_new_ids: fake_ids, self.domain.s_rng: rng, }) return self.features_from_idxs_vals(fake_ids, idxs, vals)
def test_clone(): config = config0() config2 = clone(config) nodeset = set(dfs(config)) assert not any(n in nodeset for n in dfs(config2)) foo = recursive_set_rng_kwarg( config, scope.rng_from_seed(5)) r = rec_eval(foo) print(r) r2 = rec_eval( recursive_set_rng_kwarg( config2, scope.rng_from_seed(5))) print(r2) assert r == r2
def exhaustive_search(new_ids, domain, trials, seed, nbMaxSucessiveFailures=1000): r""" This is for exhaustive search in HyperTuning. """ from hyperopt import pyll from hyperopt.base import miscs_update_idxs_vals # Build a hash set for previous trials hashset = set([ hash( frozenset([(key, value[0]) if len(value) > 0 else ((key, None)) for key, value in trial['misc']['vals'].items()])) for trial in trials.trials ]) rng = np.random.RandomState(seed) rval = [] for _, new_id in enumerate(new_ids): newSample = False nbSucessiveFailures = 0 while not newSample: # -- sample new specs, idxs, vals idxs, vals = pyll.rec_eval(domain.s_idxs_vals, memo={ domain.s_new_ids: [new_id], domain.s_rng: rng, }) new_result = domain.new_result() new_misc = dict(tid=new_id, cmd=domain.cmd, workdir=domain.workdir) miscs_update_idxs_vals([new_misc], idxs, vals) # Compare with previous hashes h = hash( frozenset([(key, value[0]) if len(value) > 0 else ((key, None)) for key, value in vals.items()])) if h not in hashset: newSample = True else: # Duplicated sample, ignore nbSucessiveFailures += 1 if nbSucessiveFailures > nbMaxSucessiveFailures: # No more samples to produce return [] rval.extend( trials.new_trial_docs([new_id], [None], [new_result], [new_misc])) return rval
def test_pyll_deeply_nested_func(): # N.B. uses stuff that isn't in the SymbolTable yet, must remove. try: def my_add(x, y): return x + y x = as_partialplus( (partial(float, partial(my_add, 0, partial(int, 3.3))) / 2, partial(float, 3)) ) y = as_pyll(x) evaluate(x) == rec_eval(y) finally: scope.undefine(my_add)
def exception_thrower(): argdict = pyll.rec_eval(expr, memo=memo, print_node_on_error=False) visitor = PrimalVisitor( pipeline=argdict['pipeline'], ctrl=argdict['ctrl'], data_view=argdict['data_view'], max_n_features=argdict['max_n_features'], # TODO: just pass memmap_name directly memmap_name=memmap_name_template % (os.getpid(), np.random.randint(10000)), thresh_rank=1, optimize_l2_reg=True, batched_lmap_speed_thresh=argdict[ 'batched_lmap_speed_thresh'], badfit_thresh=None, batchsize=argdict['batchsize'], ) protocol_iter = argdict['data_view'].protocol_iter(visitor) msg, model = protocol_iter.next() assert msg == 'model validation complete' # -- save the loss, but don't save attachments yet. rdict = visitor.hyperopt_rval() rdict['loss'] = loss_fn(visitor, argdict['bagging_fraction']) rdict['in_progress'] = True rdict['status'] = hyperopt.STATUS_OK argdict['ctrl'].checkpoint(rdict) if assume_promising: promising = True else: promising = view2_worth_calculating( loss=rdict['loss'], ctrl=argdict['ctrl'], thresh_loss=1.0, thresh_rank=1) logger.info('Promising: %s' % promising) if promising: msg, model2 = protocol_iter.next() assert msg == 'model testing complete' rdict = visitor.hyperopt_rval() rdict['loss'] = loss_fn(visitor, argdict['bagging_fraction']) rdict['true_loss'] = true_loss_fn(visitor) visitor.attach_obj_results() else: logger.warn('Not testing unpromising model %s' % str(model)) del rdict['in_progress'] return visitor, rdict
def foo(): argdict = pyll.rec_eval(expr, memo=memo, print_node_on_error=False) visitor = ESVC_SLM_Visitor(pipeline=argdict['pipeline'], ctrl=argdict['ctrl'], data_view=argdict['data_view'], max_n_features=argdict['max_n_features'], memmap_name='%s_%i' % (__name__, os.getpid()), svm_crossvalid_max_evals=50, optimize_l2_reg=True, batched_lmap_speed_thresh=argdict[ 'batched_lmap_speed_thresh'], comparison_names=comparison_names, batchsize=argdict['batchsize'], ) # -- drive the visitor according to the protocol of the data set protocol_iter = argdict['data_view'].protocol_iter(visitor) msg, model = protocol_iter.next() assert msg == 'model validation complete' # -- save the loss, but don't save attachments yet. rdict = visitor.hyperopt_rval(save_grams=False) rdict['in_progress'] = True loss_fn(visitor, rdict, argdict['bagging_fraction']) argdict['ctrl'].checkpoint(rdict) if assume_promising: promising = True else: promising = view2_worth_calculating( loss=rdict['loss'], ctrl=argdict['ctrl'], thresh_loss=1.0, thresh_rank=1) info('Promising: %s' % promising) if maybe_test_view2: if promising: info('Disabling trace verification for view2') foobar.trace_verify = False msg = protocol_iter.next() assert msg == 'model testing complete' else: warn('Not testing unpromising model %s' % str(model)) else: warn('Skipping view2 stuff for model %s' % str(model)) rdict = visitor.hyperopt_rval(save_grams=promising) loss_fn(visitor, rdict, argdict['bagging_fraction']) return visitor, rdict
def test_uniform_categorical(): p = as_pyll(variable('foo', value_type=[-1, 1, 4])) assert p.name == 'getitem' assert p.pos_args[0].name == 'pos_args' assert p.pos_args[1].name == 'hyperopt_param' assert p.pos_args[1].pos_args[0].name == 'literal' assert p.pos_args[1].pos_args[0].obj == 'foo' assert p.pos_args[1].pos_args[1].name == 'randint' # Make sure this executes and yields a value in the right domain. recursive_set_rng_kwarg(p, np.random) try: values = [rec_eval(p) for _ in xrange(10)] except Exception: assert False assert all(v in [-1, 1, 4] for v in values)
def exception_thrower(): argdict = pyll.rec_eval(expr, memo=memo, print_node_on_error=False) visitor = PrimalVisitor( pipeline=argdict['pipeline'], ctrl=argdict['ctrl'], data_view=argdict['data_view'], max_n_features=argdict['max_n_features'], # TODO: just pass memmap_name directly memmap_name=memmap_name_template % (os.getpid(), np.random.randint(10000)), thresh_rank=1, optimize_l2_reg=True, batched_lmap_speed_thresh=argdict['batched_lmap_speed_thresh'], badfit_thresh=None, batchsize=argdict['batchsize'], ) protocol_iter = argdict['data_view'].protocol_iter(visitor) msg, model = protocol_iter.next() assert msg == 'model validation complete' # -- save the loss, but don't save attachments yet. rdict = visitor.hyperopt_rval() rdict['loss'] = loss_fn(visitor, argdict['bagging_fraction']) rdict['in_progress'] = True rdict['status'] = hyperopt.STATUS_OK argdict['ctrl'].checkpoint(rdict) if assume_promising: promising = True else: promising = view2_worth_calculating(loss=rdict['loss'], ctrl=argdict['ctrl'], thresh_loss=1.0, thresh_rank=1) logger.info('Promising: %s' % promising) if promising: msg, model2 = protocol_iter.next() assert msg == 'model testing complete' rdict = visitor.hyperopt_rval() rdict['loss'] = loss_fn(visitor, argdict['bagging_fraction']) rdict['true_loss'] = true_loss_fn(visitor) visitor.attach_obj_results() else: logger.warn('Not testing unpromising model %s' % str(model)) del rdict['in_progress'] return visitor, rdict
def test_uniform_choice(): p = as_pyll(choice(variable('foo', value_type=[7, 9, 11]), (7, 'rst'), (9, 'uvw'), (11, 'xyz'))) assert p.name == 'switch' assert p.pos_args[0].name == 'hyperopt_param' assert p.pos_args[0].pos_args[0].obj == 'foo' assert p.pos_args[0].pos_args[1].name == 'randint' assert p.pos_args[0].pos_args[1].arg['upper'].obj == 3 # Make sure this executes and yields a value in the right domain. recursive_set_rng_kwarg(p, np.random) try: values = [rec_eval(p) for _ in xrange(10)] except Exception: assert False assert all(v in ['rst', 'uvw', 'xyz'] for v in values)
def best_model_vector_classification(self, train, valid): # TODO: use validation set if not-None memo = dict(self.memo) use_obj_for_literal_in_memo(self.expr, train, pyll_stubs.train_task, memo) use_obj_for_literal_in_memo(self.expr, valid, pyll_stubs.valid_task, memo) use_obj_for_literal_in_memo(self.expr, self.ctrl, pyll_stubs.ctrl, memo) model, report = rec_eval(self.expr, memo=memo) if model: model.trained_on = train.name if valid and valid.name not in self.validation_sets: self.validation_sets.append(valid.name) self.results['best_model'].append( { 'train_name': train.name, 'valid_name': valid.name if valid else None, 'model': model, 'report': report, }) return model
def test_nonuniform_categorical(): p = as_pyll(variable('baz', value_type=[3, 5, 9], distribution='categorical', p=[0.1, 0.4, 0.5])) assert p.name == 'getitem' assert p.pos_args[0].name == 'pos_args' assert p.pos_args[1].name == 'hyperopt_param' assert p.pos_args[1].pos_args[0].name == 'literal' assert p.pos_args[1].pos_args[0].obj == 'baz' assert p.pos_args[1].pos_args[1].name == 'categorical' assert p.pos_args[1].pos_args[1].arg['p'].name == 'pos_args' assert p.pos_args[1].pos_args[1].arg['p'].pos_args[0].obj == 0.1 assert p.pos_args[1].pos_args[1].arg['p'].pos_args[1].obj == 0.4 assert p.pos_args[1].pos_args[1].arg['p'].pos_args[2].obj == 0.5 # Make sure this executes and yields a value in the right domain. recursive_set_rng_kwarg(p, np.random) try: values = [rec_eval(p) for _ in xrange(10)] except Exception: assert False assert all(v in [3, 5, 9] for v in values)
def evaluate(self, config, ctrl, attach_attachments=True): memo = self.memo_from_config(config) use_obj_for_literal_in_memo(self.expr, ctrl, Ctrl, memo) if self.pass_expr_memo_ctrl: rval = self.fn(expr=self.expr, memo=memo, ctrl=ctrl) else: # -- the "work" of evaluating `config` can be written # either into the pyll part (self.expr) # or the normal Python part (self.fn) pyll_rval = pyll.rec_eval( self.expr, memo=memo, print_node_on_error=self.rec_eval_print_node_on_error) rval = self.fn(pyll_rval) if isinstance(rval, (float, int, np.number)): dict_rval = {'loss': float(rval), 'status': STATUS_OK} else: dict_rval = dict(rval) status = dict_rval['status'] if status not in STATUS_STRINGS: raise InvalidResultStatus(dict_rval) if status == STATUS_OK: # -- make sure that the loss is present and valid try: dict_rval['loss'] = float(dict_rval['loss']) except (TypeError, KeyError): raise InvalidLoss(dict_rval) if attach_attachments: attachments = dict_rval.pop('attachments', {}) for key, val in attachments.items(): ctrl.attachments[key] = val # -- don't do this here because SON-compatibility is only a requirement # for trials destined for a mongodb. In-memory rvals can contain # anything. #return base.SONify(dict_rval) return dict_rval
def test_nonuniform_choice(): var = variable('blu', value_type=[2, 4, 8], distribution='categorical', p=[0.2, 0.7, 0.1]) p = as_pyll(choice(var, (2, 'abc'), (4, 'def'), (8, 'ghi'))) assert p.name == 'switch' assert p.pos_args[0].name == 'hyperopt_param' assert p.pos_args[0].pos_args[0].obj == 'blu' assert p.pos_args[0].pos_args[1].name == 'categorical' assert p.pos_args[0].pos_args[1].arg['p'].name == 'pos_args' assert p.pos_args[0].pos_args[1].arg['p'].pos_args[0].obj == 0.2 assert p.pos_args[0].pos_args[1].arg['p'].pos_args[1].obj == 0.7 assert p.pos_args[0].pos_args[1].arg['p'].pos_args[2].obj == 0.1 # Make sure this executes and yields a value in the right domain. recursive_set_rng_kwarg(p, np.random) try: values = [rec_eval(p) for _ in xrange(10)] except Exception: assert False assert all(v in ['abc', 'def', 'ghi'] for v in values)
def test_vectorize_trivial(): N = as_apply(15) p0 = hp_uniform("p0", 0, 1) loss = p0 print(loss) expr_idxs = scope.range(N) vh = VectorizeHelper(loss, expr_idxs, build=True) vloss = vh.v_expr full_output = as_apply([vloss, vh.idxs_by_label(), vh.vals_by_label()]) fo2 = replace_repeat_stochastic(full_output) new_vc = recursive_set_rng_kwarg(fo2, as_apply(np.random.RandomState(1))) # print new_vc losses, idxs, vals = rec_eval(new_vc) print("losses", losses) print("idxs p0", idxs["p0"]) print("vals p0", vals["p0"]) p0dct = dict(list(zip(idxs["p0"], vals["p0"]))) for ii, li in enumerate(losses): assert p0dct[ii] == li
def exception_thrower(): argdict = rec_eval(expr, memo=memo, print_node_on_error=False) dataset_info = argdict['dataset_info'] learning_algo = argdict['learning_algo'] hp_space = argdict['hp_space'] pipeline = argdict['pipeline'] n_startup_trials = argdict['n_startup_trials'] n_ok_trials = argdict['n_ok_trials'] batchsize = argdict['batchsize'] min_features = argdict['min_features'] max_features = argdict['max_features'] checkpoint_fname = argdict['checkpoint_fname'] batched_lmap_speed_thresh = argdict['batched_lmap_speed_thresh'] ctrl = argdict['ctrl'] tid = ctrl.current_trial['tid'] # -- checkpoint if isinstance(ctrl.trials, Trials): if tid > 0 and tid % checkpoint_every == 0: save_hp(hp_space, ctrl.trials, n_startup_trials, checkpoint_fname) # -- retrieve trials from database if isinstance(ctrl.trials, MongoTrials): ctrl.trials.refresh() # -- check and signal stopping to optimizer current_ok_trials = count_ok_trials(ctrl.trials) if current_ok_trials >= n_ok_trials: raise SimpleHpStop( 'number of ok trials reached - ' 'stopping process with %d ok trials out of ' '%d trials.' % (current_ok_trials, tid), ctrl.trials) # -- feature extraction slm_t0 = time() fn_imgs = getattr(dataset_info['data_obj'], dataset_info['fn_imgs']) imgs = fn_imgs() limgs = lmap_ndarray(imgs) X = pyll_theano_batched_lmap( partial(callpipe1, pipeline), limgs, batchsize=batchsize, print_progress_every=10, speed_thresh=batched_lmap_speed_thresh, abort_on_rows_larger_than=max_features, x_dtype='uint8', )[:] feat_set = rec_eval(X, print_node_on_error=False) slm_time = time() - slm_t0 # -- classification eval_t0 = time() # -- feat_set in 2-D feat_shape = feat_set.shape feat_set.shape = feat_set.shape[0], -1 assert feat_set.shape[1] >= min_features, 'min_features not satisfied' fn_eval = getattr(dataset_info['data_obj'], dataset_info['fn_eval']) r_dict = fn_eval(learning_algo, feat_set) eval_time = time() - eval_t0 r_dict['status'] = hyperopt.STATUS_OK r_dict['feat_shape'] = feat_shape r_dict['slm_time'] = slm_time r_dict['eval_time'] = eval_time return r_dict
def exception_thrower(): argdict = rec_eval(expr, memo=memo, print_node_on_error=False) dataset_info = argdict['dataset_info'] learning_algo = argdict['learning_algo'] hp_space = argdict['hp_space'] pipeline = argdict['pipeline'] n_startup_trials = argdict['n_startup_trials'] n_ok_trials = argdict['n_ok_trials'] batchsize = argdict['batchsize'] min_features = argdict['min_features'] max_features = argdict['max_features'] checkpoint_fname = argdict['checkpoint_fname'] batched_lmap_speed_thresh = argdict['batched_lmap_speed_thresh'] ctrl = argdict['ctrl'] tid = ctrl.current_trial['tid'] # -- checkpoint if isinstance(ctrl.trials, Trials): if tid > 0 and tid % checkpoint_every == 0: save_hp(hp_space, ctrl.trials, n_startup_trials, checkpoint_fname) # -- retrieve trials from database if isinstance(ctrl.trials, MongoTrials): ctrl.trials.refresh() # -- check and signal stopping to optimizer current_ok_trials = count_ok_trials(ctrl.trials) if current_ok_trials >= n_ok_trials: raise SimpleHpStop('number of ok trials reached - ' 'stopping process with %d ok trials out of ' '%d trials.' % ( current_ok_trials, tid), ctrl.trials) # -- feature extraction slm_t0 = time() fn_imgs = getattr(dataset_info['data_obj'], dataset_info['fn_imgs']) imgs = fn_imgs() limgs = lmap_ndarray(imgs) X = pyll_theano_batched_lmap( partial(callpipe1, pipeline), limgs, batchsize=batchsize, print_progress_every=10, speed_thresh=batched_lmap_speed_thresh, abort_on_rows_larger_than=max_features, x_dtype='uint8', )[:] feat_set = rec_eval(X, print_node_on_error=False) slm_time = time() - slm_t0 # -- classification eval_t0 = time() # -- feat_set in 2-D feat_shape = feat_set.shape feat_set.shape = feat_set.shape[0], -1 assert feat_set.shape[1] >= min_features, 'min_features not satisfied' fn_eval = getattr(dataset_info['data_obj'], dataset_info['fn_eval']) r_dict = fn_eval(learning_algo, feat_set) eval_time = time() - eval_t0 r_dict['status'] = hyperopt.STATUS_OK r_dict['feat_shape'] = feat_shape r_dict['slm_time'] = slm_time r_dict['eval_time'] = eval_time return r_dict
def test_vectorize_config0(): p0 = hp_uniform('p0', 0, 1) p1 = hp_loguniform('p1', 2, 3) p2 = hp_choice('p2', [-1, p0]) p3 = hp_choice('p3', [-2, p1]) p4 = 1 p5 = [3, 4, p0] p6 = hp_choice('p6', [-3, p1]) d = locals() d['p1'] = None # -- don't sample p1 all the time, only if p3 says so config = as_apply(d) N = as_apply('N:TBA') expr = config expr_idxs = scope.range(N) vh = VectorizeHelper(expr, expr_idxs, build=True) vconfig = vh.v_expr full_output = as_apply([vconfig, vh.idxs_by_label(), vh.vals_by_label()]) if 1: print('=' * 80) print('VECTORIZED') print(full_output) print('\n' * 1) fo2 = replace_repeat_stochastic(full_output) if 0: print('=' * 80) print('VECTORIZED STOCHASTIC') print(fo2) print('\n' * 1) new_vc = recursive_set_rng_kwarg( fo2, as_apply(np.random.RandomState(1)) ) if 0: print('=' * 80) print('VECTORIZED STOCHASTIC WITH RNGS') print(new_vc) Nval = 10 foo, idxs, vals = rec_eval(new_vc, memo={N: Nval}) print('foo[0]', foo[0]) print('foo[1]', foo[1]) assert len(foo) == Nval if 0: # XXX refresh these values to lock down sampler assert foo[0] == { 'p0': 0.39676747423066994, 'p1': None, 'p2': 0.39676747423066994, 'p3': 2.1281244479293568, 'p4': 1, 'p5': (3, 4, 0.39676747423066994) } assert foo[1] != foo[2] print(idxs) print(vals['p3']) print(vals['p6']) print(idxs['p1']) print(vals['p1']) assert len(vals['p3']) == Nval assert len(vals['p6']) == Nval assert len(idxs['p1']) < Nval p1d = dict(list(zip(idxs['p1'], vals['p1']))) for ii, (p3v, p6v) in enumerate(zip(vals['p3'], vals['p6'])): if p3v == p6v == 0: assert ii not in idxs['p1'] if p3v: assert foo[ii]['p3'] == p1d[ii] if p6v: print('p6', foo[ii]['p6'], p1d[ii]) assert foo[ii]['p6'] == p1d[ii]
def test_pyll_nested_func(): x = partial(float, partial(int, 5.5)) y = as_pyll(x) assert evaluate(x) == rec_eval(y)
def test_vectorize_config0(): p0 = hp_uniform("p0", 0, 1) p1 = hp_loguniform("p1", 2, 3) p2 = hp_choice("p2", [-1, p0]) p3 = hp_choice("p3", [-2, p1]) p4 = 1 p5 = [3, 4, p0] p6 = hp_choice("p6", [-3, p1]) d = locals() d["p1"] = None # -- don't sample p1 all the time, only if p3 says so config = as_apply(d) N = as_apply("N:TBA") expr = config expr_idxs = scope.range(N) vh = VectorizeHelper(expr, expr_idxs, build=True) vconfig = vh.v_expr full_output = as_apply([vconfig, vh.idxs_by_label(), vh.vals_by_label()]) if 1: print("=" * 80) print("VECTORIZED") print(full_output) print("\n" * 1) fo2 = replace_repeat_stochastic(full_output) if 0: print("=" * 80) print("VECTORIZED STOCHASTIC") print(fo2) print("\n" * 1) new_vc = recursive_set_rng_kwarg(fo2, as_apply(np.random.RandomState(1))) if 0: print("=" * 80) print("VECTORIZED STOCHASTIC WITH RNGS") print(new_vc) Nval = 10 foo, idxs, vals = rec_eval(new_vc, memo={N: Nval}) print("foo[0]", foo[0]) print("foo[1]", foo[1]) assert len(foo) == Nval if 0: # XXX refresh these values to lock down sampler assert foo[0] == { "p0": 0.39676747423066994, "p1": None, "p2": 0.39676747423066994, "p3": 2.1281244479293568, "p4": 1, "p5": (3, 4, 0.39676747423066994), } assert (foo[1].keys() != foo[2].keys()) or (foo[1].values() != foo[2].values()) print(idxs) print(vals["p3"]) print(vals["p6"]) print(idxs["p1"]) print(vals["p1"]) assert len(vals["p3"]) == Nval assert len(vals["p6"]) == Nval assert len(idxs["p1"]) < Nval p1d = dict(list(zip(idxs["p1"], vals["p1"]))) for ii, (p3v, p6v) in enumerate(zip(vals["p3"], vals["p6"])): if p3v == p6v == 0: assert ii not in idxs["p1"] if p3v: assert foo[ii]["p3"] == p1d[ii] if p6v: print("p6", foo[ii]["p6"], p1d[ii]) assert foo[ii]["p6"] == p1d[ii]
def test_vectorize_config0(): p0 = hp_uniform('p0', 0, 1) p1 = hp_loguniform('p1', 2, 3) p2 = hp_choice('p2', [-1, p0]) p3 = hp_choice('p3', [-2, p1]) p4 = 1 p5 = [3, 4, p0] p6 = hp_choice('p6', [-3, p1]) d = locals() d['p1'] = None # -- don't sample p1 all the time, only if p3 says so config = as_apply(d) N = as_apply('N:TBA') expr = config expr_idxs = scope.range(N) vh = VectorizeHelper(expr, expr_idxs, build=True) vconfig = vh.v_expr full_output = as_apply([vconfig, vh.idxs_by_label(), vh.vals_by_label()]) if 1: print '=' * 80 print 'VECTORIZED' print full_output print '\n' * 1 fo2 = replace_repeat_stochastic(full_output) if 0: print '=' * 80 print 'VECTORIZED STOCHASTIC' print fo2 print '\n' * 1 new_vc = recursive_set_rng_kwarg(fo2, as_apply(np.random.RandomState(1))) if 0: print '=' * 80 print 'VECTORIZED STOCHASTIC WITH RNGS' print new_vc Nval = 10 foo, idxs, vals = rec_eval(new_vc, memo={N: Nval}) print 'foo[0]', foo[0] print 'foo[1]', foo[1] assert len(foo) == Nval if 0: # XXX refresh these values to lock down sampler assert foo[0] == { 'p0': 0.39676747423066994, 'p1': None, 'p2': 0.39676747423066994, 'p3': 2.1281244479293568, 'p4': 1, 'p5': (3, 4, 0.39676747423066994) } assert foo[1] != foo[2] print idxs print vals['p3'] print vals['p6'] print idxs['p1'] print vals['p1'] assert len(vals['p3']) == Nval assert len(vals['p6']) == Nval assert len(idxs['p1']) < Nval p1d = dict(zip(idxs['p1'], vals['p1'])) for ii, (p3v, p6v) in enumerate(zip(vals['p3'], vals['p6'])): if p3v == p6v == 0: assert ii not in idxs['p1'] if p3v: assert foo[ii]['p3'] == p1d[ii] if p6v: print 'p6', foo[ii]['p6'], p1d[ii] assert foo[ii]['p6'] == p1d[ii]
def test_pyll_tuple(): x = as_partialplus((6, 9, 4)) y = as_pyll(x) assert evaluate(x) == rec_eval(y)
def work(self, **kwargs): self.__dict__.update(kwargs) bandit = opt_q_uniform(self.target) prior_weight = 2.5 gamma = 0.20 algo = partial(tpe.suggest, prior_weight=prior_weight, n_startup_jobs=2, n_EI_candidates=128, gamma=gamma) #print algo.opt_idxs['x'] #print algo.opt_vals['x'] trials = Trials() fmin(passthrough, space=bandit.expr, algo=algo, trials=trials, max_evals=self.LEN) if self.show_vars: import hyperopt.plotting hyperopt.plotting.main_plot_vars(trials, bandit, do_show=1) idxs, vals = miscs_to_idxs_vals(trials.miscs) idxs = idxs['x'] vals = vals['x'] losses = trials.losses() from hyperopt.tpe import ap_filter_trials from hyperopt.tpe import adaptive_parzen_samplers qu = scope.quniform(1.01, 10, 1) fn = adaptive_parzen_samplers['quniform'] fn_kwargs = dict(size=(4, ), rng=np.random) s_below = pyll.Literal() s_above = pyll.Literal() b_args = [s_below, prior_weight] + qu.pos_args b_post = fn(*b_args, **fn_kwargs) a_args = [s_above, prior_weight] + qu.pos_args a_post = fn(*a_args, **fn_kwargs) #print b_post #print a_post fn_lpdf = getattr(scope, a_post.name + '_lpdf') print fn_lpdf # calculate the llik of b_post under both distributions a_kwargs = dict([(n, a) for n, a in a_post.named_args if n not in ('rng', 'size')]) b_kwargs = dict([(n, a) for n, a in b_post.named_args if n not in ('rng', 'size')]) below_llik = fn_lpdf(*([b_post] + b_post.pos_args), **b_kwargs) above_llik = fn_lpdf(*([b_post] + a_post.pos_args), **a_kwargs) new_node = scope.broadcast_best(b_post, below_llik, above_llik) print '=' * 80 do_show = self.show_steps for ii in range(2, 9): if ii > len(idxs): break print '-' * 80 print 'ROUND', ii print '-' * 80 all_vals = [2, 3, 4, 5, 6, 7, 8, 9, 10] below, above = ap_filter_trials(idxs[:ii], vals[:ii], idxs[:ii], losses[:ii], gamma) below = below.astype('int') above = above.astype('int') print 'BB0', below print 'BB1', above #print 'BELOW', zip(range(100), np.bincount(below, minlength=11)) #print 'ABOVE', zip(range(100), np.bincount(above, minlength=11)) memo = {b_post: all_vals, s_below: below, s_above: above} bl, al, nv = pyll.rec_eval([below_llik, above_llik, new_node], memo=memo) #print bl - al print 'BB2', dict(zip(all_vals, bl - al)) print 'BB3', dict(zip(all_vals, bl)) print 'BB4', dict(zip(all_vals, al)) print 'ORIG PICKED', vals[ii] print 'PROPER OPT PICKS:', nv #assert np.allclose(below, [3, 3, 9]) #assert len(below) + len(above) == len(vals) if do_show: plt.subplot(8, 1, ii) #plt.scatter(all_vals, # np.bincount(below, minlength=11)[2:], c='b') #plt.scatter(all_vals, # np.bincount(above, minlength=11)[2:], c='c') plt.scatter(all_vals, bl, c='g') plt.scatter(all_vals, al, c='r') if do_show: plt.show()
def hyper_optimization(year, mode, human_metric, best_params): """ Execute a hyper optimization algorithm in order to obtain the best parameters for a specific model when we are testing on 'year' with mode='mode' :param year: The year we are testing. :param mode: Depending on your choice : ['Single Task', 'Multi Task-1', 'Multi Task-5']. :param human_metric: The metric for which the model is trained. It is needed only on 'Single Task' mode. :param best_params: A dictionary where will be saved all the best parameters obtained by hyper-optimization """ search_space = json.load( open(CONFIG_PATH))['hyper_optimization']['search_space'] # Clears the session for each run of the algorithm K.clear_session() global TRIAL_NO TRIAL_NO = 0 log_path = os.path.join( LOGS_DIR, 'hyper_opt_log_{}_{}_{}.txt'.format(human_metric, year, mode)) logger_name = 'LOGGER_{}_{}_{}'.format(year, human_metric, mode) setup_logger(logger_name=logger_name, log_path=log_path, level=logging.INFO) global LOGGER LOGGER = logging.getLogger(logger_name) train_x, train_y, val_x, val_y, val_ordered_ids = load_train_data(year) test_x, test_y, test_ordered_ids, test_empty_ids = load_test_data(year) if mode == 'Single Task': # 1 Dense -> 1 predictions human_metric_index = int(human_metric[1]) - 1 train_y = train_y[:, human_metric_index] val_y = val_y[:, human_metric_index] test_y = test_y[:, human_metric_index] train_samples = {'x': train_x, 'y': train_y} test_samples = { 'x': test_x, 'y': test_y, 'ordered_ids': test_ordered_ids, 'empty_ids': test_empty_ids } val_samples = {'x': val_x, 'y': val_y, 'ordered_ids': val_ordered_ids} search_space = dict([(key, hp.choice(key, value)) for key, value in search_space.items()]) space_item = pyll.rec_eval( {key: value.pos_args[-1] for key, value in search_space.items()}) network = compile_bigrus_attention( shape=(300, 300), n_hidden_layers=space_item['n_hidden_layers'], hidden_units_size=space_item['hidden_units_size'], dropout_rate=space_item['dropout_rate'], word_dropout_rate=space_item['word_dropout_rate'], lr=space_item['learning_rate'], mode=mode) # Start hyper-opt trials while True: try: trials = pickle.load( open( os.path.join(TRIALS_DIR, '{}_{}_{}'.format(year, human_metric, mode)), 'rb')) max_evaluations = len(trials.trials) + 1 print("Found it") except FileNotFoundError: trials = Trials() max_evaluations = 1 TRIAL_NO = max_evaluations if max_evaluations > HYPER_OPT_CONFIG['trials']: break fmin(fn=lambda space_item: optimization_function( network=network, train_samples=train_samples, test_samples=test_samples, val_samples=val_samples, current_space=space_item, year=year, mode=mode, metric=human_metric), space=search_space, algo=tpe.suggest, max_evals=max_evaluations, trials=trials) with open( os.path.join(TRIALS_DIR, '{}_{}_{}'.format(year, human_metric, mode)), 'wb') as f: pickle.dump(trials, f) flag = True # We want to write only the best parameters each time LOGGER.info( '\n\n--------------------- Results Summary Best to Worst ------------------' ) for t in sorted(trials.results, key=lambda trial: trial['loss'], reverse=False): conf = t['results']['configuration'] average_statistics = t['results']['statistics'] if flag: best_params[year][human_metric][mode] = { "HL": conf['n_hidden_layers'], "HU": conf['hidden_units_size'], "BS": conf['batch_size'], "D": conf['dropout_rate'], "WD": conf['word_dropout_rate'], "LR": conf['learning_rate'] } flag = False log_msg = MSG_TEMPLATE.format( t['trial_no'], HYPER_OPT_CONFIG['trials'], str(conf['n_hidden_layers']), str(conf['hidden_units_size']), conf['batch_size'], conf['dropout_rate'], conf['word_dropout_rate'], conf['attention_mechanism'], conf['learning_rate'], year, human_metric, mode) + '\n' if mode == 'Multi Task-1' or mode == 'Multi Task-5': log_msg += 'Val: \n Q1 -> {} \n Q2 -> {} \n Q3 -> {} \n Q4 -> {} \n Q5 -> {} \n'.format( ''.join([ '{}={:.3f} '.format( metric, average_statistics['validation']['Q1'][metric]) for metric in ['Spearman', 'Kendall', 'Pearson'] ]), ''.join([ '{}={:.3f} '.format( metric, average_statistics['validation']['Q2'][metric]) for metric in ['Spearman', 'Kendall', 'Pearson'] ]), ''.join([ '{}={:.3f} '.format( metric, average_statistics['validation']['Q3'][metric]) for metric in ['Spearman', 'Kendall', 'Pearson'] ]), ''.join([ '{}={:.3f} '.format( metric, average_statistics['validation']['Q4'][metric]) for metric in ['Spearman', 'Kendall', 'Pearson'] ]), ''.join([ '{}={:.3f} '.format( metric, average_statistics['validation']['Q5'][metric]) for metric in ['Spearman', 'Kendall', 'Pearson'] ])) log_msg += 'Test: \n Q1 -> {} \n Q2 -> {} \n Q3 -> {} \n Q4 -> {} \n Q5 -> {} \n'.format( ''.join([ '{}={:.3f} '.format( metric, average_statistics['test']['Q1'][metric]) for metric in ['Spearman', 'Kendall', 'Pearson'] ]), ''.join([ '{}={:.3f} '.format( metric, average_statistics['test']['Q2'][metric]) for metric in ['Spearman', 'Kendall', 'Pearson'] ]), ''.join([ '{}={:.3f} '.format( metric, average_statistics['test']['Q3'][metric]) for metric in ['Spearman', 'Kendall', 'Pearson'] ]), ''.join([ '{}={:.3f} '.format( metric, average_statistics['test']['Q4'][metric]) for metric in ['Spearman', 'Kendall', 'Pearson'] ]), ''.join([ '{}={:.3f} '.format( metric, average_statistics['test']['Q5'][metric]) for metric in ['Spearman', 'Kendall', 'Pearson'] ])) elif mode == 'Single Task': log_msg += 'Val: \n {} -> {} \n'.format( human_metric, ''.join([ '{}={:.3f} '.format( metric, average_statistics['validation'][human_metric][metric]) for metric in ['Spearman', 'Kendall', 'Pearson'] ])) log_msg += 'Test: \n {} -> {} \n'.format( human_metric, ''.join([ '{}={:.3f} '.format( metric, average_statistics['test'][human_metric][metric]) for metric in ['Spearman', 'Kendall', 'Pearson'] ])) LOGGER.info(log_msg) trials_training_time = sum( [trial['results']['time'] for trial in trials.results]) LOGGER.info('\n Hyper Optimization search took {} days {}\n\n'.format( int(trials_training_time / (24 * 60 * 60)), time.strftime("%H:%M:%S", time.gmtime(trials_training_time))))
def work(self, **kwargs): self.__dict__.update(kwargs) bandit = opt_q_uniform(self.target) prior_weight = 2.5 gamma = 0.20 algo = partial(tpe.suggest, prior_weight=prior_weight, n_startup_jobs=2, n_EI_candidates=128, gamma=gamma) #print algo.opt_idxs['x'] #print algo.opt_vals['x'] trials = Trials() fmin(passthrough, space=bandit.expr, algo=algo, trials=trials, max_evals=self.LEN) if self.show_vars: import hyperopt.plotting hyperopt.plotting.main_plot_vars(trials, bandit, do_show=1) idxs, vals = miscs_to_idxs_vals(trials.miscs) idxs = idxs['x'] vals = vals['x'] losses = trials.losses() from hyperopt.tpe import ap_filter_trials from hyperopt.tpe import adaptive_parzen_samplers qu = scope.quniform(1.01, 10, 1) fn = adaptive_parzen_samplers['quniform'] fn_kwargs = dict(size=(4,), rng=np.random) s_below = pyll.Literal() s_above = pyll.Literal() b_args = [s_below, prior_weight] + qu.pos_args b_post = fn(*b_args, **fn_kwargs) a_args = [s_above, prior_weight] + qu.pos_args a_post = fn(*a_args, **fn_kwargs) #print b_post #print a_post fn_lpdf = getattr(scope, a_post.name + '_lpdf') print fn_lpdf # calculate the llik of b_post under both distributions a_kwargs = dict([(n, a) for n, a in a_post.named_args if n not in ('rng', 'size')]) b_kwargs = dict([(n, a) for n, a in b_post.named_args if n not in ('rng', 'size')]) below_llik = fn_lpdf(*([b_post] + b_post.pos_args), **b_kwargs) above_llik = fn_lpdf(*([b_post] + a_post.pos_args), **a_kwargs) new_node = scope.broadcast_best(b_post, below_llik, above_llik) print '=' * 80 do_show = self.show_steps for ii in range(2, 9): if ii > len(idxs): break print '-' * 80 print 'ROUND', ii print '-' * 80 all_vals = [2, 3, 4, 5, 6, 7, 8, 9, 10] below, above = ap_filter_trials(idxs[:ii], vals[:ii], idxs[:ii], losses[:ii], gamma) below = below.astype('int') above = above.astype('int') print 'BB0', below print 'BB1', above #print 'BELOW', zip(range(100), np.bincount(below, minlength=11)) #print 'ABOVE', zip(range(100), np.bincount(above, minlength=11)) memo = {b_post: all_vals, s_below: below, s_above: above} bl, al, nv = pyll.rec_eval([below_llik, above_llik, new_node], memo=memo) #print bl - al print 'BB2', dict(zip(all_vals, bl - al)) print 'BB3', dict(zip(all_vals, bl)) print 'BB4', dict(zip(all_vals, al)) print 'ORIG PICKED', vals[ii] print 'PROPER OPT PICKS:', nv #assert np.allclose(below, [3, 3, 9]) #assert len(below) + len(above) == len(vals) if do_show: plt.subplot(8, 1, ii) #plt.scatter(all_vals, # np.bincount(below, minlength=11)[2:], c='b') #plt.scatter(all_vals, # np.bincount(above, minlength=11)[2:], c='c') plt.scatter(all_vals, bl, c='g') plt.scatter(all_vals, al, c='r') if do_show: plt.show()
def test_pyll_list(): x = as_partialplus([5, 3, 9]) y = as_pyll(x) # rec_eval always uses tuple assert evaluate(x) == list(rec_eval(y))
def pyll_theano_batched_lmap(pipeline, seq, batchsize, _debug_call_counts=None, print_progress_every=float('inf'), abort_on_rows_larger_than=None, speed_thresh=None, x_dtype='float32', ): """ This function returns a skdata.larray.lmap object whose function is defined by a theano expression. The theano expression will be built and compiled specifically for the dimensions of the given `seq`. Therefore, in_rows, and out_rows should actually be a *pyll* graph, that evaluates to a theano graph. """ in_shp = (batchsize,) + seq.shape[1:] batch = np.zeros(in_shp, dtype=x_dtype) s_ibatch = theano.shared(batch) s_xi = theano.tensor.as_tensor_variable(s_ibatch).type() s_N = s_xi.shape[0] s_X = theano.tensor.set_subtensor(s_ibatch[:s_N], s_xi) #print 'PIPELINE', pipeline thing = pipeline((s_X, in_shp)) #print 'THING' #print thing #print '===' s_obatch, oshp = pyll.rec_eval(thing) assert oshp[0] == batchsize logger.info('batched_lmap oshp %s' % str(oshp)) if abort_on_rows_larger_than: rowlen = np.prod(oshp[1:]) if rowlen > abort_on_rows_larger_than: raise ValueError('rowlen %i exceeds limit %i' % ( rowlen, abort_on_rows_larger_than)) # Compile a function that takes a variable number of elements in, # returns the same number of processed elements out, # but does all internal computations using a fixed number of elements, # because convolutions are fastest when they're hard-coded to a certain # size. logger.debug('pyll_theano_batched_lmap compiling fn') _fn = theano.function([theano.Param(s_xi, strict=True)], s_obatch[:s_N], updates={ s_ibatch: s_X, # this allows the inc_subtensor to be in-place }) logger.debug('pyll_theano_batched_lmap compiling fn -> done') sums = {'elems': 0, 'times': 0.0} if speed_thresh is None: time_fn = _fn else: def time_fn(X): t0 = time.time() if str(X.dtype) != x_dtype: print 'time_fn dtype problem', X.dtype, x_dtype rval = _fn(X) dt = time.time() - t0 #print 'DEBUG time_fn dt:', dt sums['elems'] += len(X) sums['times'] += dt return rval def raise_if_slow(): exc = EvalTimeout( 'batched_lmap failed to compute %i elements in %f secs' % (speed_thresh['elements'], speed_thresh['seconds'])) if sums['elems'] >= speed_thresh['elements']: observed_ratio = sums['elems'] / sums['times'] required_ratio = (speed_thresh['elements'] / speed_thresh['seconds']) if observed_ratio < required_ratio: raise exc else: sums['elems'] = 0 sums['times'] = 0.0 def fn_1(x): if _debug_call_counts: _debug_call_counts['fn_1'] += 1 return time_fn(x[None, :, :, :])[0] attrs = { 'shape': oshp[1:], 'ndim': len(oshp) -1, 'dtype': s_obatch.dtype } def rval_getattr(attr, objs): # -- objs don't matter to the structure of the return value try: return attrs[attr] except KeyError: raise AttributeError(attr) fn_1.rval_getattr = rval_getattr last_print_time = [time.time()] def check_for_print(offset, X): curtime = time.time() if (curtime - last_print_time[0]) > print_progress_every: logger.info('pyll_theano_batched_lmap.f_map %i %i' % ( offset, len(X))) last_print_time[0] = curtime if speed_thresh is not None: raise_if_slow() def f_map(X): if _debug_call_counts: _debug_call_counts['f_map'] += 1 if len(X) == batchsize: check_for_print(offset=0, X=X) return time_fn(X) rval = np.empty((len(X),) + oshp[1:], dtype=s_obatch.dtype) offset = 0 while offset < len(X): check_for_print(offset, X) xi = X[offset: offset + batchsize] fn_i = time_fn(xi) if not np.all(np.isfinite(fn_i)): raise ValueError('non-finite features') rval[offset:offset + len(xi)] = fn_i offset += len(xi) return rval return larray.lmap(fn_1, seq, f_map=f_map)
def test_pyll_func(): # N.B. Only uses stuff that's already in the SymbolTable. x = partial(float, 5) y = as_pyll(x) assert evaluate(x) == rec_eval(y)
def suggest(self, new_ids, domain, trials, seed): rng = np.random.RandomState(seed) rval = [] # print('new_ids', new_ids) for ii, new_id in enumerate(new_ids): while self._cnt <= self.num_combinations: # -- sample new specs, idxs, vals idxs, vals = pyll.rec_eval( domain.s_idxs_vals, memo={ domain.s_new_ids: [new_id], domain.s_rng: rng, }) new_result = domain.new_result() new_misc = dict(tid=new_id, cmd=domain.cmd, workdir=domain.workdir) miscs_update_idxs_vals([new_misc], idxs, vals) new_trial = trials.new_trial_docs([new_id], [None], [new_result], [new_misc]) # Except the `while`, until here, code is copied from rand.suggest # new code from here self.executed_params = self.executed_params.union( self._get_historical_params(trials)) # avoid counting floating zero twice (as +0.0 and -0.0) this_run_params = hyperopt_grid._convert_neg_zeros_to_zeros( dict(new_misc['vals'])) # represent the params as a hashed string this_run_params_str = dict_to_sorted_str(this_run_params) # if these params are seen for the first time, then generate a new # trial for them if this_run_params_str not in self.executed_params: # add the new trial to returned list rval.extend(new_trial) # log the new trial as executed, in order to avoid duplication self._cnt += 1 self.executed_params = \ self.executed_params.union([this_run_params_str]) print(self._cnt, this_run_params) break else: # otherwise (params were seen), skip this trial # update internal counter self._cnt_skip += 1 # Stopping condition (breaking the hyperopt loop) if len(self.executed_params) >= self.num_combinations: # returning an empty list, breaks the hyperopt loop return [] # "Emergency" stopping condition, breaking the hyperopt loop when # loop runs for too long without submitted experiments if self._cnt_skip >= 100*self.num_combinations: warnings.warn('Warning: Exited due to too many skips.' ' This can happen if most of the param combinationa have ' 'been encountered, and drawing a new ' 'unseen combination, involves a very low probablity.') # returning an empty list, breaks the hyperopt loop return [] return rval
def get_args(params): memo = {node: params[node.arg['label'].obj] for node in pyll.dfs(expr) if node.name == 'hyperopt_param'} return pyll.rec_eval(expr, memo=memo)
def pyll_theano_batched_lmap( pipeline, seq, batchsize, _debug_call_counts=None, print_progress_every=float('inf'), abort_on_rows_larger_than=None, speed_thresh=None, x_dtype='float32', ): """ This function returns a skdata.larray.lmap object whose function is defined by a theano expression. The theano expression will be built and compiled specifically for the dimensions of the given `seq`. Therefore, in_rows, and out_rows should actually be a *pyll* graph, that evaluates to a theano graph. """ in_shp = (batchsize, ) + seq.shape[1:] batch = np.zeros(in_shp, dtype=x_dtype) s_ibatch = theano.shared(batch) s_xi = theano.tensor.as_tensor_variable(s_ibatch).type() s_N = s_xi.shape[0] s_X = theano.tensor.set_subtensor(s_ibatch[:s_N], s_xi) #print 'PIPELINE', pipeline thing = pipeline((s_X, in_shp)) #print 'THING' #print thing #print '===' s_obatch, oshp = pyll.rec_eval(thing) assert oshp[0] == batchsize logger.info('batched_lmap oshp %s' % str(oshp)) if abort_on_rows_larger_than: rowlen = np.prod(oshp[1:]) if rowlen > abort_on_rows_larger_than: raise ValueError('rowlen %i exceeds limit %i' % (rowlen, abort_on_rows_larger_than)) # Compile a function that takes a variable number of elements in, # returns the same number of processed elements out, # but does all internal computations using a fixed number of elements, # because convolutions are fastest when they're hard-coded to a certain # size. logger.debug('pyll_theano_batched_lmap compiling fn') _fn = theano.function( [theano.Param(s_xi, strict=True)], s_obatch[:s_N], updates={ s_ibatch: s_X, # this allows the inc_subtensor to be in-place }) logger.debug('pyll_theano_batched_lmap compiling fn -> done') sums = {'elems': 0, 'times': 0.0} if speed_thresh is None: time_fn = _fn else: def time_fn(X): t0 = time.time() if str(X.dtype) != x_dtype: print 'time_fn dtype problem', X.dtype, x_dtype rval = _fn(X) dt = time.time() - t0 #print 'DEBUG time_fn dt:', dt sums['elems'] += len(X) sums['times'] += dt return rval def raise_if_slow(): exc = EvalTimeout( 'batched_lmap failed to compute %i elements in %f secs' % (speed_thresh['elements'], speed_thresh['seconds'])) if sums['elems'] >= speed_thresh['elements']: observed_ratio = sums['elems'] / sums['times'] required_ratio = (speed_thresh['elements'] / speed_thresh['seconds']) if observed_ratio < required_ratio: raise exc else: sums['elems'] = 0 sums['times'] = 0.0 def fn_1(x): if _debug_call_counts: _debug_call_counts['fn_1'] += 1 return time_fn(x[None, :, :, :])[0] attrs = {'shape': oshp[1:], 'ndim': len(oshp) - 1, 'dtype': s_obatch.dtype} def rval_getattr(attr, objs): # -- objs don't matter to the structure of the return value try: return attrs[attr] except KeyError: raise AttributeError(attr) fn_1.rval_getattr = rval_getattr last_print_time = [time.time()] def check_for_print(offset, X): curtime = time.time() if (curtime - last_print_time[0]) > print_progress_every: logger.info('pyll_theano_batched_lmap.f_map %i %i' % (offset, len(X))) last_print_time[0] = curtime if speed_thresh is not None: raise_if_slow() def f_map(X): if _debug_call_counts: _debug_call_counts['f_map'] += 1 if len(X) == batchsize: check_for_print(offset=0, X=X) return time_fn(X) rval = np.empty((len(X), ) + oshp[1:], dtype=s_obatch.dtype) offset = 0 while offset < len(X): check_for_print(offset, X) xi = X[offset:offset + batchsize] fn_i = time_fn(xi) if not np.all(np.isfinite(fn_i)): raise ValueError('non-finite features') rval[offset:offset + len(xi)] = fn_i offset += len(xi) return rval return larray.lmap(fn_1, seq, f_map=f_map)