def define_summaries(graph, config, cleanups): summaries = [] plot_summaries = [] # Control dependencies for non thread-safe matplot. length = graph.data['length'] mask = tf.range(graph.embedded.shape[1].value)[None, :] < length[:, None] heads = graph.heads.copy() last_time = tf.Variable(lambda: tf.timestamp(), trainable=False) last_step = tf.Variable(lambda: 0.0, trainable=False, dtype=tf.float64) def transform(dist): mean = config.postprocess_fn(dist.mean()) mean = tf.clip_by_value(mean, 0.0, 1.0) return tfd.Independent(tfd.Normal(mean, 1.0), len(dist.event_shape)) heads.unlock() heads['image'] = lambda features: transform(graph.heads['image'](features)) heads.lock() with tf.variable_scope('general'): summaries += summary.data_summaries(graph.data, config.postprocess_fn) summaries += summary.dataset_summaries(config.train_dir) summaries += summary.objective_summaries(graph.objectives) summaries.append(tf.summary.scalar('step', graph.step)) new_time, new_step = tf.timestamp(), tf.cast(graph.global_step, tf.float64) delta_time, delta_step = new_time - last_time, new_step - last_step with tf.control_dependencies([delta_time, delta_step]): assign_ops = [last_time.assign(new_time), last_step.assign(new_step)] with tf.control_dependencies(assign_ops): summaries.append(tf.summary.scalar( 'steps_per_second', delta_step / delta_time)) summaries.append(tf.summary.scalar( 'seconds_per_step', delta_time / delta_step)) with tf.variable_scope('closedloop'): prior, posterior = tools.unroll.closed_loop( graph.cell, graph.embedded, graph.data['action'], config.debug) summaries += summary.state_summaries(graph.cell, prior, posterior, mask) with tf.variable_scope('prior'): prior_features = graph.cell.features_from_state(prior) prior_dists = { name: head(prior_features) for name, head in heads.items()} summaries += summary.dist_summaries(prior_dists, graph.data, mask) summaries += summary.image_summaries( prior_dists['image'], config.postprocess_fn(graph.data['image'])) with tf.variable_scope('posterior'): posterior_features = graph.cell.features_from_state(posterior) posterior_dists = { name: head(posterior_features) for name, head in heads.items()} summaries += summary.dist_summaries( posterior_dists, graph.data, mask) summaries += summary.image_summaries( posterior_dists['image'], config.postprocess_fn(graph.data['image'])) with tf.variable_scope('openloop'): state = tools.unroll.open_loop( graph.cell, graph.embedded, graph.data['action'], config.open_loop_context, config.debug) state_features = graph.cell.features_from_state(state) state_dists = {name: head(state_features) for name, head in heads.items()} summaries += summary.dist_summaries(state_dists, graph.data, mask) summaries += summary.image_summaries( state_dists['image'], config.postprocess_fn(graph.data['image'])) summaries += summary.state_summaries(graph.cell, state, posterior, mask) with tf.control_dependencies(plot_summaries): plot_summary, prediction = summary.prediction_summaries( state_dists, graph.data, state) plot_summaries += plot_summary summaries += plot_summary # sess = tf.Session() # result = sess.run(prediction[0].eval) # phase, epoch, steps_in = self._find_current_phase(global_step) # with sess.as_default(): # resutl = sess.run(phase.op, phase.feeds) # print("Dists: ", state.items()) # print("prediction: ", type(prediction), len(prediction)) # print("Truth: ", type(truth), len(truth)) # truth_arr = np.asarray(truth) # prediction_arr = np.asarray(prediction) # print("prediction_arr: ", type(prediction_arr), prediction_arr) # print("truth_arr: ", type(truth_arr), truth_arr) # np.save(file='/home/pulver/Desktop/tmp_planet/supervised_data/prediction', arr=prediction_arr) # np.save(file='/home/pulver/Desktop/tmp_planet/supervised_data/truth', arr=truth_arr) print("Pred: ", prediction) with tf.variable_scope('simulation'): sim_returns = [] for name, params in config.test_collects.items(): # These are expensive and equivalent for train and test phases, so only # do one of them. sim_summary, sim_return = tf.cond( tf.equal(graph.phase, 'test'), lambda: utility.simulate_episodes( config, params, graph, cleanups, expensive_summaries=False, gif_summary=True, name=name), lambda: ('', 0.0), name='should_simulate_' + params.task.name) summaries.append(sim_summary) sim_returns.append(sim_return) summaries = tf.summary.merge(summaries) score = tf.reduce_mean(sim_returns)[None] # print("==> Graph.data: ", graph.data) print("==> state_dists['image']: ", state_dists['image'].mode()[0]) graph_slim = {} graph_slim['action'] =graph.data['action'][0] graph_slim['image'] = graph.data['image'][0] graph_slim['position'] = graph.data['position'][0] graph_slim['velocity'] = graph.data['velocity'][0] graph_slim['return'] = graph.data['return'][0] graph_slim['reward'] = graph.data['reward'][0] graph_slim['predicted_image'] = state_dists['image'].mode()[0] return summaries, score, prediction, graph_slim
def define_testmodel(data, trainer, config, logdir): tf.logging.info('Build TensorFlow compute graph.') dependencies = [] cleanups = [] step = trainer.step global_step = trainer.global_step phase = trainer.phase # Instantiate network blocks. cell = config.cell() kwargs = dict(create_scope_now_=True) encoder = tf.make_template('encoder', config.encoder, **kwargs) heads = tools.AttrDict(_unlocked=True) dummy_features = cell.features_from_state(cell.zero_state(1, tf.float32)) for key, head in config.heads.items(): name = 'head_{}'.format(key) kwargs = dict(create_scope_now_=True) if key in data: kwargs['data_shape'] = data[key].shape[2:].as_list() elif key == 'action_target': kwargs['data_shape'] = data['action'].shape[2:].as_list() heads[key] = tf.make_template(name, head, **kwargs) heads[key](dummy_features) # Initialize weights. print( cell, encoder, ) # Apply and optimize model. embedded = encoder(data) with tf.control_dependencies(dependencies): embedded = tf.identity(embedded) graph = tools.AttrDict(locals()) prior, posterior = tools.unroll.closed_loop(cell, embedded, data['action'], config.debug) features = graph.cell.features_from_state(posterior) pred = heads['reward'](features) # dependencies.append(reward_statistics(pred, data['reward'], logdir)) summaries = [] with tf.variable_scope('simulation'): sim_returns = [] for name, params in config.test_collects.items(): # These are expensive and equivalent for train and test phases, so only # do one of them. print(name, params) sim_summary, score = tf.cond( tf.equal(graph.phase, 'test'), lambda: utility.simulate_episodes(config, params, graph, cleanups, expensive_summaries=True, gif_summary=True, name=name), lambda: ('', 0.0), name='should_simulate_' + params.task.name) # with tf.variable_scope('collection'): # with tf.control_dependencies(summaries): # Make sure to train first. # for name, params in config.train_collects.items(): # # schedule = tools.schedule.binary( # # step, config.batch_shape[0], # # params.steps_after, params.steps_every, params.steps_until) # # summary, _ = tf.cond( # # tf.logical_and(tf.equal(trainer.phase, 'train'), schedule), # # functools.partial( # # utility.simulate_episodes, config, params, graph, cleanups, # # expensive_summaries=True, gif_summary=False, name=name), # # lambda: (tf.constant(''), tf.constant(0.0)), # # name='should_collect_' + name) # summary, score = utility.simulate_episodes(config, params, graph, cleanups, # expensive_summaries=False, gif_summary=False, name=name) # # dependencies.append(summary) # print('wuuw', sim_return) # objectives = utility.compute_objectives( # posterior, prior, data, graph, config, trainer) # summaries, grad_norms = utility.apply_optimizers( # objectives, trainer, config) # # Active data collection. # with tf.variable_scope('collection'): # with tf.control_dependencies(summaries): # Make sure to train first. # for name, params in config.train_collects.items(): # schedule = tools.schedule.binary( # step, config.batch_shape[0], # params.steps_after, params.steps_every, params.steps_until) # summary, _ = tf.cond( # tf.logical_and(tf.equal(trainer.phase, 'train'), schedule), # functools.partial( # utility.simulate_episodes, config, params, graph, cleanups, # expensive_summaries=True, gif_summary=False, name=name), # lambda: (tf.constant(''), tf.constant(0.0)), # name='should_collect_' + name) # summaries.append(summary) # # Compute summaries. # score = tf.zeros((0,), tf.float32) # summary, score = tf.cond( # trainer.log, # lambda: define_summaries.define_summaries(graph, config, cleanups), # lambda: (tf.constant(''), tf.zeros((0,), tf.float32)), # name='summaries') # summaries = tf.summary.merge([summaries, summary]) # dependencies.append(utility.print_metrics( # {ob.name: ob.value for ob in objectives}, # step, config.print_metrics_every, 'objectives')) with tf.control_dependencies(dependencies): score = tf.identity(score) return score, summaries, cleanups
def define_summaries(graph, config, cleanups): summaries = [] plot_summaries = [] # Control dependencies for non thread-safe matplot. length = graph.data['length'] mask = tf.range(graph.embedded.shape[1])[None, :] < length[:, None] heads = graph.heads.copy() last_time = tf.Variable(lambda: tf.timestamp(), trainable=False) last_step = tf.Variable(lambda: 0.0, trainable=False, dtype=tf.float64) def transform(dist): mean = config.postprocess_fn(dist.mean()) mean = tf.clip_by_value(mean, 0.0, 1.0) return tfd.Independent(tfd.Normal(mean, 1.0), len(dist.event_shape)) heads.unlock() heads['image'] = lambda features: transform(graph.heads['image'](features)) heads.lock() with tf.compat.v1.variable_scope('general'): summaries += summary.data_summaries(graph.data, config.postprocess_fn) summaries += summary.dataset_summaries(config.train_dir) summaries += summary.objective_summaries(graph.objectives) summaries.append(tf.compat.v1.summary.scalar('step', graph.step)) new_time, new_step = tf.timestamp(), tf.cast(graph.global_step, tf.float64) delta_time, delta_step = new_time - last_time, new_step - last_step with tf.control_dependencies([delta_time, delta_step]): assign_ops = [ last_time.assign(new_time), last_step.assign(new_step) ] with tf.control_dependencies(assign_ops): summaries.append( tf.compat.v1.summary.scalar('steps_per_second', delta_step / delta_time)) summaries.append( tf.compat.v1.summary.scalar('seconds_per_step', delta_time / delta_step)) with tf.compat.v1.variable_scope('closedloop'): prior, posterior = tools.unroll.closed_loop(graph.cell, graph.embedded, graph.data['action'], config.debug) summaries += summary.state_summaries(graph.cell, prior, posterior, mask) with tf.compat.v1.variable_scope('prior'): prior_features = graph.cell.features_from_state(prior) prior_dists = { name: head(prior_features) for name, head in heads.items() } summaries += summary.dist_summaries(prior_dists, graph.data, mask) summaries += summary.image_summaries( prior_dists['image'], config.postprocess_fn(graph.data['image'])) with tf.compat.v1.variable_scope('posterior'): posterior_features = graph.cell.features_from_state(posterior) posterior_dists = { name: head(posterior_features) for name, head in heads.items() } summaries += summary.dist_summaries(posterior_dists, graph.data, mask) summaries += summary.image_summaries( posterior_dists['image'], config.postprocess_fn(graph.data['image'])) with tf.compat.v1.variable_scope('openloop'): state = tools.unroll.open_loop(graph.cell, graph.embedded, graph.data['action'], config.open_loop_context, config.debug) state_features = graph.cell.features_from_state(state) state_dists = { name: head(state_features) for name, head in heads.items() } summaries += summary.dist_summaries(state_dists, graph.data, mask) summaries += summary.image_summaries( state_dists['image'], config.postprocess_fn(graph.data['image'])) summaries += summary.state_summaries(graph.cell, state, posterior, mask) with tf.control_dependencies(plot_summaries): plot_summary = summary.prediction_summaries( state_dists, graph.data, state) plot_summaries += plot_summary summaries += plot_summary with tf.compat.v1.variable_scope('simulation'): sim_returns = [] for name, params in config.test_collects.items(): # These are expensive and equivalent for train and test phases, so only # do one of them. sim_summary, sim_return = tf.compat.v1.cond( tf.compat.v1.equal(graph.phase, 'test'), lambda: utility.simulate_episodes(config, params, graph, cleanups, expensive_summaries=False, gif_summary=True, name=name), lambda: ('', 0.0), name='should_simulate_' + params.task.name) summaries.append(sim_summary) sim_returns.append(sim_return) summaries = tf.compat.v1.summary.merge(summaries) score = tf.reduce_mean(sim_returns)[None] return summaries, score
def define_summaries(graph, config, cleanups): summaries = [] plot_summaries = [] # Control dependencies for non thread-safe matplot. length = graph.data['length'] mask = tf.range(graph.embedded.shape[1].value)[None, :] < length[:, None] heads = graph.heads.copy() last_time = tf.Variable(lambda: tf.timestamp(), trainable=False) last_step = tf.Variable(lambda: 0.0, trainable=False, dtype=tf.float64) def transform(dist): mean = config.postprocess_fn(dist.mean()) mean = tf.clip_by_value(mean, 0.0, 1.0) return tfd.Independent(tfd.Normal(mean, 1.0), len(dist.event_shape)) if not config.cpc: heads.unlock() heads['image'] = lambda features: transform(graph.heads['image'] (features)) heads.lock() with tf.variable_scope('general'): summaries += summary.data_summaries(graph.data, config.postprocess_fn) summaries += summary.dataset_summaries(config.train_dir) summaries += summary.objective_summaries(graph.objectives) summaries.append(tf.summary.scalar('step', graph.step)) new_time, new_step = tf.timestamp(), tf.cast(graph.global_step, tf.float64) delta_time, delta_step = new_time - last_time, new_step - last_step with tf.control_dependencies([delta_time, delta_step]): assign_ops = [ last_time.assign(new_time), last_step.assign(new_step) ] with tf.control_dependencies(assign_ops): summaries.append( tf.summary.scalar('steps_per_second', delta_step / delta_time)) summaries.append( tf.summary.scalar('seconds_per_step', delta_time / delta_step)) with tf.variable_scope('embedding_magnitude'): summaries += summary.magnitude_summary(graph.embedded, 'emb') with tf.variable_scope('cpc'): cpc_logs = graph.cpc_logs for k, v in cpc_logs.items(): summaries.append(tf.summary.scalar(k, v)) with tf.variable_scope('closedloop'): prior, posterior = tools.unroll.closed_loop(graph.cell, graph.embedded, graph.data['action'], config.debug) summaries += summary.state_summaries(graph.cell, prior, posterior, mask) with tf.variable_scope('prior'): prior_features = graph.cell.features_from_state(prior) prior_dists = { name: head(prior_features) for name, head in heads.items() } summaries += summary.dist_summaries(prior_dists, graph.data, mask) if not config.cpc: summaries += summary.image_summaries( prior_dists['image'], config.postprocess_fn(graph.data['image'])) with tf.variable_scope('magnitude'): summaries += summary.magnitude_summary(prior['sample'], 'sample') summaries += summary.magnitude_summary( prior['sample'][:, 1:] - prior['sample'][:, :-1], 'diff') with tf.variable_scope('posterior'): posterior_features = graph.cell.features_from_state(posterior) posterior_dists = { name: head(posterior_features) for name, head in heads.items() } summaries += summary.dist_summaries(posterior_dists, graph.data, mask) with tf.variable_scope('magnitude'): summaries += summary.magnitude_summary(posterior['sample'], 'sample') summaries += summary.magnitude_summary( posterior['sample'][:, 1:] - posterior['sample'][:, :-1], 'diff') if not config.cpc: summaries += summary.image_summaries( posterior_dists['image'], config.postprocess_fn(graph.data['image'])) with tf.variable_scope('mixed'): with tf.variable_scope('magnitude'): summaries += summary.magnitude_summary( prior['sample'][:, 1:] - posterior['sample'][:, :-1], 'diff') with tf.variable_scope('openloop'): state = tools.unroll.open_loop(graph.cell, graph.embedded, graph.data['action'], config.open_loop_context, config.debug) state_features = graph.cell.features_from_state(state) state_dists = { name: head(state_features) for name, head in heads.items() } summaries += summary.dist_summaries(state_dists, graph.data, mask) with tf.variable_scope('magnitude'): summaries += summary.magnitude_summary(state['sample'], 'sample') summaries += summary.magnitude_summary( tf.abs(state['sample'][:, 1:] - state['sample'][:, :-1]), 'diff') if not config.cpc: summaries += summary.image_summaries( state_dists['image'], config.postprocess_fn(graph.data['image'])) summaries += summary.state_summaries(graph.cell, state, posterior, mask) with tf.control_dependencies(plot_summaries): plot_summary = summary.prediction_summaries( state_dists, graph.data, state) plot_summaries += plot_summary summaries += plot_summary with tf.variable_scope('simulation'): sim_returns = [] for name, params in config.test_collects.items(): # These are expensive and equivalent for train and test phases, so only # do one of them. sim_summary, sim_return = tf.cond( tf.equal(graph.phase, 'test'), lambda: utility.simulate_episodes(config, params, graph, cleanups, expensive_summaries=False, gif_summary=True, name=name), lambda: ('', 0.0), name='should_simulate_' + params.task.name) summaries.append(sim_summary) sim_returns.append(sim_return) if config.robustness_summary: with tf.variable_scope('robustness'): env = config.tasks[0].env_ctor() num_states = 20 num_tries = 5 images = tf.zeros(shape=(0, 32, 32, 3)) for i in range(num_states): state = np.random.uniform(low=[-1.8, -np.pi], high=[1.8, np.pi], size=(2, )) for j in range(num_tries): env._physics.reset_from_obs(state) env.task.get_observation(env._physics) img = config.preprocess_fn(env._render_image()) # plt.imshow(img) # plt.savefig("%d_%d.png" % (i, j)) images = tf.concat([images, img[None]], axis=0) embedded = tf.reshape(graph.encoder(images), shape=(num_states, num_tries, -1)) # calculate variance within different representations of the same state group_mean = tf.reduce_mean(embedded, axis=1, keepdims=True) variance_within = tf.reduce_mean( tf.reduce_sum(tf.square(embedded - group_mean), axis=-1)) # calculate total variance total_mean = tf.reduce_mean(embedded, axis=[0, 1], keepdims=True) total_variance = tf.reduce_mean( tf.reduce_sum(tf.square(embedded - total_mean), axis=-1)) summaries.append( tf.summary.scalar('variance_within', variance_within)) summaries.append( tf.summary.scalar('total_variance', total_variance)) summaries.append( tf.summary.scalar('variance_ratio', variance_within / total_variance)) images = tf.zeros(shape=(0, 32, 32, 3)) for i in range(num_states): state = np.random.uniform(low=[-1.8, -np.pi], high=[1.8, np.pi], size=(2, )) for j in range(2): env._physics.reset_from_obs(state) env.task.get_observation(env._physics, no_dis=(j == 0)) img = config.preprocess_fn(env._render_image()) # plt.imshow(img) # plt.savefig("%d_%d.png" % (i, j)) images = tf.concat([images, img[None]], axis=0) embedded = tf.reshape(graph.encoder(images), shape=(num_states, 2, -1)) # calculate variance within different representations of the same state difference_norm = tf.norm(embedded[:, 0] - embedded[:, 1], axis=-1) first_norm = tf.norm(embedded[:, 1], axis=-1) ratio = tf.reduce_mean(difference_norm / first_norm) summaries.append( tf.summary.scalar('difference_norm', tf.reduce_mean(difference_norm))) summaries.append( tf.summary.scalar('first_norm', tf.reduce_mean(first_norm))) summaries.append(tf.summary.scalar('ratio_of_norm', ratio)) summaries = tf.summary.merge(summaries) score = tf.reduce_mean(sim_returns)[None] return summaries, score