def run(self, T_or_generator, inner_objective_feed_dicts=None, outer_objective_feed_dicts=None, initializer_feed_dict=None, global_step=None, session=None, online=False, callback=None): # callback may be a pair, first for froward pass, second for reverse pass callback = utils.as_tuple_or_list(callback) # same thing for T T_or_generator = utils.as_tuple_or_list(T_or_generator) ss = session or tf.get_default_session() del self._history[:] if not online: _fd = utils.maybe_call(initializer_feed_dict, utils.maybe_eval(global_step, ss)) self._save_history(ss.run(self.initialization, feed_dict=_fd)) # else: # not totally clear if i should add this # self._save_history(ss.run(list(self.state))) T = 0 # this is useful if T_or_generator is indeed a generator... for t in utils.solve_int_or_generator(T_or_generator[0]): # nonlocal t # with nonlocal would not be necessary the variable T... not compatible with 2.7 _fd = utils.maybe_call(inner_objective_feed_dicts, t) self._save_history(ss.run(self.iteration, feed_dict=_fd)) utils.maybe_call(callback[0], t, _fd, ss) T = t # initialization of support variables (supports stochastic evaluation of outer objective via global_step -> # variable) # TODO (maybe tf bug or oddity) for some strange reason, if some variable's initializer depends on # a placeholder, then the initializer of alpha SEEMS TO DEPEND ALSO ON THAT placeholder, # as if the primary variable should be reinitialized as well, but, I've checked, the primary variable is NOT # actually reinitialized. This doesn't make sense since the primary variable is already initialized # and Tensorflow seems not to care... should maybe look better into this issue reverse_init_fd = utils.maybe_call(outer_objective_feed_dicts, utils.maybe_eval(global_step, ss)) # now adding also the initializer_feed_dict because of tf quirk... maybe_init_fd = utils.maybe_call(initializer_feed_dict, utils.maybe_eval(global_step, ss)) reverse_init_fd = utils.merge_dicts(reverse_init_fd, maybe_init_fd) ss.run(self._reverse_initializer, feed_dict=reverse_init_fd) for pt, state_feed_dict in self._state_feed_dict_generator( reversed(self._history[:-1]), T_or_generator[-1]): # this should be fine also for truncated reverse... but check again the index t t = T - pt - 1 # if T is int then len(self.history) is T + 1 and this numerator # shall start at T-1 _fd = utils.merge_dicts( state_feed_dict, utils.maybe_call(inner_objective_feed_dicts, t)) ss.run(self._alpha_iter, _fd) if len(callback) == 2: utils.maybe_call(callback[1], t, _fd, ss)
def run(self, T_or_generator, inner_objective_feed_dicts=None, outer_objective_feed_dicts=None, initializer_feed_dict=None, global_step=None, session=None, online=False, forward_callback=None): ss = session or tf.get_default_session() self._history.clear() if not online: _fd = utils.maybe_call(initializer_feed_dict, utils.maybe_eval(global_step, ss)) self._history.append(ss.run(self.initialization, feed_dict=_fd)) else: self._history.append(ss.run(self.state)) # optionally may track inner objective (to check for divergence) for t in range(T_or_generator) if utils.isinteger( T_or_generator) else T_or_generator: _fd = utils.maybe_call(inner_objective_feed_dicts, t) self._history.append(ss.run(self.iteration, feed_dict=_fd)) if forward_callback is not None: forward_callback(t, _fd, ss) # initialization of support variables (supports stochastic evaluation of outer objective via global_step -> # variable) # TODO (maybe tf bug or oddity) for some strange reason, if some variable's initializer depends on # a placeholder, then the initializer of alpha SEEMS TO DEPEND ALSO ON THAT placeholder, # as if the primary variable should be reinitialized as well, but, I've checked, the primary variable is NOT # actually reinitialized. This doesn't make sense since the primary variable is already initialized # and Tensorflow seems not to care... should maybe look better into this issue reverse_init_fd = utils.maybe_call(outer_objective_feed_dicts, utils.maybe_eval(global_step, ss)) # now adding also the initializer_feed_dict because of tf quirk... maybe_init_fd = utils.maybe_call(initializer_feed_dict, utils.maybe_eval(global_step, ss)) if maybe_init_fd is not None: reverse_init_fd = utils.merge_dicts(reverse_init_fd, maybe_init_fd) ss.run(self._reverse_initializer, feed_dict=reverse_init_fd) for pt, state_feed_dict in self._state_feed_dict_generator( reversed(self._history[:-1])): # this should be fine also for truncated reverse... but check again the index t t = len( self._history ) - pt - 2 # if T is int then len(self.history) is T + 1 and this numerator # shall start at T-1 ss.run(self._alpha_iter, feed_dict=utils.merge_dicts( state_feed_dict, utils.maybe_call(inner_objective_feed_dicts, t) if inner_objective_feed_dicts else {}))
def _opt_fd(): _od = maybe_call(optimization_step_feed_dict, maybe_eval(self._global_step)) \ if optimization_step_feed_dict else {} # e.g. hyper-learning rate is a placeholder _oo_fd = maybe_call(outer_objective_feed_dicts, maybe_eval(self._global_step)) \ if outer_objective_feed_dicts else {} # this is used in ForwardHG. In ReverseHG should't be needed # but it doesn't matter return merge_dicts(_od, _oo_fd)
def redivide_data(datasets, partition_proportions=None, shuffle=False): """ Function that redivides datasets. Can be use also to shuffle or filter or map examples. :param datasets: original datasets, instances of class Dataset (works with get_data and get_targets for compatibility with mnist datasets :param partition_proportions: (optional, default None) list of fractions that can either sum up to 1 or less then one, in which case one additional partition is created with proportion 1 - sum(partition proportions). If None it will retain the same proportion of samples found in datasets :param shuffle: (optional, default False) if True shuffles the examples :return: a list of datasets of length equal to the (possibly augmented) partition_proportion """ all_data = np.vstack([get_data(d) for d in datasets]) all_labels = np.vstack([get_targets(d) for d in datasets]) all_infos = np.concatenate([d.sample_info for d in datasets]) N = all_data.shape[0] if partition_proportions: # argument check partition_proportions = list([partition_proportions] if isinstance(partition_proportions, float) else partition_proportions) sum_proportions = sum(partition_proportions) assert sum_proportions <= 1, "partition proportions must sum up to at most one: %d" % sum_proportions if sum_proportions < 1.: partition_proportions += [1. - sum_proportions] else: partition_proportions = [1. * get_data(d).shape[0] / N for d in datasets] if shuffle: permutation = np.arange(all_data.shape[0]) np.random.shuffle(permutation) all_data = all_data[permutation] all_labels = np.array(all_labels[permutation]) all_infos = np.array(all_infos[permutation]) N = all_data.shape[0] assert N == all_labels.shape[0] calculated_partitions = reduce( lambda v1, v2: v1 + [sum(v1) + v2], [int(N * prp) for prp in partition_proportions], [0] ) calculated_partitions[-1] = N print('datasets.redivide_data:, computed partitions numbers -', calculated_partitions, 'len all', N, end=' ') new_general_info_dict = merge_dicts(*[d.info for d in datasets]) new_datasets = [ Dataset(data=all_data[d1:d2], target=all_labels[d1:d2], sample_info=all_infos[d1:d2], info=new_general_info_dict) for d1, d2 in zip(calculated_partitions, calculated_partitions[1:]) ] print('DONE') return new_datasets
def _training_supplier(step=0): if step >= self.T: if step % self.T == 0: if self.epochs: print( 'WARNING: End of the training scheme reached.' 'Generating another scheme.', file=sys.stderr) self.generate_visiting_scheme() step %= self.T if self.training_schedule is None: self.generate_visiting_scheme() # noinspection PyTypeChecker nb = self.training_schedule[step * self.batch_size:min( (step + 1) * self.batch_size, len(self.training_schedule))] bx = self.dataset.data[nb, :] by = self.dataset.target[nb, :] # if lambda_feeds: # this was previous implementation... dunno for what it was used for # lambda_processed_feeds = {k: v(nb) for k, v in lambda_feeds.items()} previous implementation... # looks like lambda was # else: # lambda_processed_feeds = {} return utils.merge_dicts({ x: bx, y: by }, utils.maybe_call(other_feeds, step))
def _state_feed_dict_generator(self, history, T_or_generator): for t, his in zip(utils.solve_int_or_generator(T_or_generator), history): yield t, utils.merge_dicts(*[ od.state_feed_dict(h) for od, h in zip(sorted(self._optimizer_dicts), his) ])
def run(self, T_or_generator, inner_objective_feed_dicts=None, outer_objective_feed_dicts=None, initializer_feed_dict=None, global_step=None, session=None, online=False): ss = session or tf.get_default_session() self._history.clear() if not online: self._history.append(ss.run(self.initialization, feed_dict=utils.maybe_call( initializer_feed_dict, utils.maybe_eval(global_step, ss)))) for t in range(T_or_generator) if isinstance(T_or_generator, int) else T_or_generator: self._history.append(ss.run(self.iteration, feed_dict=utils.maybe_call(inner_objective_feed_dicts, t))) # initialization of support variables (supports stochastic evaluation of outer objective via global_step -> # variable) ss.run(self._reverse_initializer, feed_dict=utils.maybe_call(outer_objective_feed_dicts, utils.maybe_eval(global_step, ss))) for pt, state_feed_dict in self._state_feed_dict_generator(reversed(self._history[:-1])): # this should be fine also for truncated reverse... but check again the index t t = len(self._history) - pt - 2 # if T is int then len(self.history) is T + 1 and this numerator # shall start at T-1 (99.99 sure its correct) ss.run(self._alpha_iter, feed_dict=utils.merge_dicts(state_feed_dict, utils.maybe_call(inner_objective_feed_dicts, t) if inner_objective_feed_dicts else {}))
def state_feed_dict(self, his): # considers also alpha_k if len(his) == len(self._dynamics): return {v: his[k] for k, v in enumerate(self.state) } # for the initialization step return utils.merge_dicts({v: his[k] for k, v in enumerate(self.state)}, {self.eta_k: his[-1]})
def run(self, T_or_generator, inner_objective_feed_dicts=None, outer_objective_feed_dicts=None, initializer_feed_dict=None, global_step=None, session=None, online=False): ss = session or tf.get_default_session() self._history = [] if not online: self._history.append( ss.run(self.initialization, feed_dict=utils.maybe_call( initializer_feed_dict, utils.maybe_eval(global_step, ss)))) for t in range(T_or_generator) if isinstance(T_or_generator, int) else T_or_generator: self._history.append( ss.run(self.iteration, feed_dict=utils.maybe_call(inner_objective_feed_dicts, t))) # initialization of support variables (supports stochastic evaluation of outer objective via global_step -> # variable) ss.run(self._reverse_initializer, feed_dict=utils.maybe_call(outer_objective_feed_dicts, utils.maybe_eval(global_step, ss))) for pt, state_feed_dict in self._state_feed_dict_generator( reversed(self._history[:-1])): # this should be fine also for truncated reverse... but check again the index t t = len( self._history ) - pt - 2 # if T is int then len(self.history) is T + 1 and this numerator # shall start at T-1 (99.99 sure its correct) ss.run(self._alpha_iter, feed_dict=utils.merge_dicts( state_feed_dict, utils.maybe_call(inner_objective_feed_dicts, t) if inner_objective_feed_dicts else {}))
def run(self, T_or_generator, inner_objective_feed_dicts=None, outer_objective_feed_dicts=None, initializer_feed_dict=None, global_step=None, session=None, online=False, callback=None): ss = session or tf.get_default_session() inner_objective_feed_dicts = utils.as_tuple_or_list( inner_objective_feed_dicts) if not online: self._run_batch_initialization( ss, utils.maybe_call(initializer_feed_dict, utils.maybe_eval(global_step, ss))) for t in utils.solve_int_or_generator(T_or_generator): _fd = utils.maybe_call(inner_objective_feed_dicts[0], t) self._forward_step(ss, _fd) utils.maybe_call(callback, t, _fd, ss) # end of optimization. Solve linear systems. tol_val = utils.maybe_call(self.tolerance, utils.maybe_eval( global_step, ss)) # decreasing tolerance (seq.) # feed dictionaries (could...in theory, implement stochastic solution of this linear system...) _fd = utils.maybe_call(inner_objective_feed_dicts[-1], -1) _fd_outer = utils.maybe_call(outer_objective_feed_dicts, utils.maybe_eval(global_step, ss)) _fd = utils.merge_dicts(_fd, _fd_outer) for lin_sys in self._lin_sys: lin_sys(tol_val).minimize( ss, _fd) # implicitly warm restarts with previously found q
def _state_feed_dict_generator(self, history): for t, his in enumerate(history): yield t, utils.merge_dicts(*[ od.state_feed_dict(h) for od, h in zip(sorted(self._optimizer_dicts), his) ])
def _supplier(step=0): return utils.merge_dicts({ x: self.data, y: self.target }, utils.maybe_call(other_feeds, step))