def __init__(self, config, gpu_id=0, ngpu=1, logger=None): self._hyperparams = config self.agent = config['agent']['type'](config['agent']) self.agentparams = config['agent'] self.policyparams = config['policy'] if logger == None: self.logger = Logger(printout=True) else: self.logger = logger self.logger.log('started sim') self.agentparams['gpu_id'] = gpu_id self.policy = config['policy']['type'](self.agent._hyperparams, config['policy'], gpu_id, ngpu) self._record_queue = config.pop('record_saver', None) self._counter = config.pop('counter', None) self.trajectory_list = [] self.im_score_list = [] try: os.remove(self._hyperparams['agent']['image_dir']) except: pass self.task_mode = 'train'
def __init__(self, conf, data_collectors=None, todo_ids=None, printout=False, mode='train'): self.logger = Logger(conf['logging_dir'], 'replay_log.txt', printout=printout) self.conf = conf self.onpolconf = conf['onpolconf'] if 'agent' in conf: self.agentparams = conf['agent'] self.ring_buffer = [] self.mode = mode self.maxsize = self.onpolconf['replay_size'][mode] self.batch_size = conf['batch_size'] self.data_collectors = data_collectors self.todo_ids = todo_ids self.scores = [] self.num_updates = 0 self.logger.log('init Replay buffer') self.tstart = time.time()
class Sim(object): """ Main class to run algorithms and experiments. """ def __init__(self, config, gpu_id=0, ngpu=1, logger=None): self._hyperparams = config self.agent = config['agent']['type'](config['agent']) self.agentparams = config['agent'] self.policyparams = config['policy'] if logger == None: self.logger = Logger(printout=True) else: self.logger = logger self.logger.log('started sim') self.agentparams['gpu_id'] = gpu_id self.policy = config['policy']['type'](self.agent._hyperparams, config['policy'], gpu_id, ngpu) self._record_queue = config.pop('record_saver', None) self._counter = config.pop('counter', None) self.trajectory_list = [] self.im_score_list = [] try: os.remove(self._hyperparams['agent']['image_dir']) except: pass self.task_mode = 'train' def run(self): if self._counter is None: for i in range(self._hyperparams['start_index'], self._hyperparams['end_index'] + 1): self.take_sample(i) else: itr = self._counter.ret_increment() while itr < self._hyperparams['ntraj']: print('taking sample {} of {}'.format( itr, self._hyperparams['ntraj'])) self.take_sample(itr) itr = self._counter.ret_increment() def take_sample(self, sample_index): self.policy.reset() agent_data, obs_dict, policy_out = self.agent.sample( self.policy, sample_index) if self._hyperparams.get('save_data', True): self.save_data(sample_index, agent_data, obs_dict, policy_out) return agent_data def save_data(self, itr, agent_data, obs_dict, policy_outputs): if self._hyperparams.get('save_only_good', False) and not agent_data['goal_reached']: return if self._hyperparams.get('save_raw_images', False): self._save_raw_data(itr, agent_data, obs_dict, policy_outputs) elif self._record_queue is not None: self._record_queue.put((agent_data, obs_dict, policy_outputs)) else: raise ValueError('Saving neither raw data nor records') def _save_raw_data(self, itr, agent_data, obs_dict, policy_outputs): if 'RESULT_DIR' in os.environ: data_save_dir = os.environ['RESULT_DIR'] + '/data' else: data_save_dir = self.agentparams['data_save_dir'] ngroup = self._hyperparams.get('ngroup', 1000) igrp = itr // ngroup group_folder = data_save_dir + '/{}/traj_group{}'.format( self.task_mode, igrp) if not os.path.exists(group_folder): os.makedirs(group_folder) traj_folder = group_folder + '/traj{}'.format(itr) if os.path.exists(traj_folder): print('trajectory folder {} already exists, deleting the folder'. format(traj_folder)) shutil.rmtree(traj_folder) os.makedirs(traj_folder) print('writing: ', traj_folder) if 'images' in obs_dict: images = obs_dict.pop('images') T, n_cams = images.shape[:2] for i in range(n_cams): os.mkdir(traj_folder + '/images{}'.format(i)) for t in range(T): for i in range(n_cams): cv2.imwrite( '{}/images{}/im_{}.png'.format(traj_folder, i, t), images[t, i, :, :, ::-1]) with open('{}/agent_data.pkl'.format(traj_folder), 'wb') as file: pkl.dump(agent_data, file) with open('{}/obs_dict.pkl'.format(traj_folder), 'wb') as file: pkl.dump(obs_dict, file) with open('{}/policy_out.pkl'.format(traj_folder), 'wb') as file: pkl.dump(policy_outputs, file)
def __init__(self, ag_params, policyparams): """ :param ag_params: :param policyparams: :param predictor: :param save_subdir: :param gdnet: goal-distance network """ self._hp = self._default_hparams() self.override_defaults(policyparams) self.agentparams = ag_params if 'logging_dir' in self.agentparams: self.logger = Logger( self.agentparams['logging_dir'], 'cem{}log.txt'.format(self.agentparams['gpu_id'])) else: self.logger = Logger(printout=True) self.logger.log('init CEM controller') self.t = None if self._hp.verbose: self.verbose = True if isinstance(self._hp.verbose, int): self.verbose_freq = self._hp.verbose else: self.verbose_freq = 1 else: self.verbose = False self.verbose_freq = 1 self.niter = self._hp.iterations self.action_list = [] self.naction_steps = self._hp.nactions self.repeat = self._hp.repeat if isinstance(self._hp.num_samples, list): self.M = self._hp.num_samples[0] else: self.M = self._hp.num_samples if self._hp.selection_frac != -1: self.K = int(np.ceil(self.M * self._hp.selection_frac)) else: self.K = 10 # only consider K best samples for refitting #action dimensions: # deltax, delty, goup_nstep, delta_rot, close_nstep self.adim = self.agentparams['adim'] self.sdim = self.agentparams['sdim'] # state dimension self.indices = [] self.mean = None self.sigma = None self.state = None self.dict_ = collections.OrderedDict() if 'sawyer' in self.agentparams: self.gen_image_publisher = rospy.Publisher('gen_image', numpy_msg(floatarray), queue_size=10) self.gen_pix_distrib_publisher = rospy.Publisher( 'gen_pix_distrib', numpy_msg(floatarray), queue_size=10) self.gen_score_publisher = rospy.Publisher('gen_score', numpy_msg(floatarray), queue_size=10) self.plan_stat = {} #planning statistics self.warped_image_goal, self.warped_image_start = None, None if self._hp.stochastic_planning: self.smp_peract = self._hp.stochastic_planning[0] else: self.smp_peract = 1 self.ncam = 1 self.ndesig = 1 self.ncontxt = 0 self.len_pred = self.repeat * self.naction_steps - self.ncontxt self.best_cost_perstep = np.zeros( [self.ncam, self.ndesig, self.len_pred]) self._close_override = False
class CEM_Controller_Base(Policy): """ Cross Entropy Method Stochastic Optimizer """ def __init__(self, ag_params, policyparams): """ :param ag_params: :param policyparams: :param predictor: :param save_subdir: :param gdnet: goal-distance network """ self._hp = self._default_hparams() self.override_defaults(policyparams) self.agentparams = ag_params if 'logging_dir' in self.agentparams: self.logger = Logger( self.agentparams['logging_dir'], 'cem{}log.txt'.format(self.agentparams['gpu_id'])) else: self.logger = Logger(printout=True) self.logger.log('init CEM controller') self.t = None if self._hp.verbose: self.verbose = True if isinstance(self._hp.verbose, int): self.verbose_freq = self._hp.verbose else: self.verbose_freq = 1 else: self.verbose = False self.verbose_freq = 1 self.niter = self._hp.iterations self.action_list = [] self.naction_steps = self._hp.nactions self.repeat = self._hp.repeat if isinstance(self._hp.num_samples, list): self.M = self._hp.num_samples[0] else: self.M = self._hp.num_samples if self._hp.selection_frac != -1: self.K = int(np.ceil(self.M * self._hp.selection_frac)) else: self.K = 10 # only consider K best samples for refitting #action dimensions: # deltax, delty, goup_nstep, delta_rot, close_nstep self.adim = self.agentparams['adim'] self.sdim = self.agentparams['sdim'] # state dimension self.indices = [] self.mean = None self.sigma = None self.state = None self.dict_ = collections.OrderedDict() if 'sawyer' in self.agentparams: self.gen_image_publisher = rospy.Publisher('gen_image', numpy_msg(floatarray), queue_size=10) self.gen_pix_distrib_publisher = rospy.Publisher( 'gen_pix_distrib', numpy_msg(floatarray), queue_size=10) self.gen_score_publisher = rospy.Publisher('gen_score', numpy_msg(floatarray), queue_size=10) self.plan_stat = {} #planning statistics self.warped_image_goal, self.warped_image_start = None, None if self._hp.stochastic_planning: self.smp_peract = self._hp.stochastic_planning[0] else: self.smp_peract = 1 self.ncam = 1 self.ndesig = 1 self.ncontxt = 0 self.len_pred = self.repeat * self.naction_steps - self.ncontxt self.best_cost_perstep = np.zeros( [self.ncam, self.ndesig, self.len_pred]) self._close_override = False def _default_hparams(self): default_dict = { 'verbose': False, 'verbose_every_itr': False, 'niter': 3, 'num_samples': [200], 'selection_frac': -1., # specifcy which fraction of best samples to use to compute mean and var for next CEM iteration 'discrete_ind': None, 'reuse_mean': False, 'reuse_cov': False, 'stochastic_planning': False, 'rejection_sampling': True, 'cov_blockdiag': False, 'smooth_cov': False, 'iterations': 3, 'nactions': 5, 'repeat': 3, 'action_bound': True, 'action_order': [ None ], # [None] implies default order, otherwise specify how each action dim in order (aka ['x', 'y', ...] 'initial_std': 0.05, #std dev. in xy 'initial_std_lift': 0.15, #std dev. in xy 'initial_std_rot': np.pi / 18, 'initial_std_grasp': 2, 'autograsp_epsilon': [ None ], # if autograsp epsilon is not None apply ag_epsilon to gripper dims (last dim if action order not specified) 'finalweight': 10, 'use_first_plan': False, 'custom_sampler': None, 'replan_interval': -1, 'type': None, 'add_zero_action': False, # add one action sample with zero actions, this might prevent random walks in the end 'reduce_std_dev': 1., # reduce standard dev in later timesteps when reusing action 'visualize_best': True, # visualizer selects K best if True (random K trajectories otherwise) } parent_params = super(CEM_Controller_Base, self)._default_hparams() for k in default_dict.keys(): parent_params.add_hparam(k, default_dict[k]) return parent_params def reset(self): self.plan_stat = {} #planning statistics self.indices = [] self.action_list = [] def perform_CEM(self): self.logger.log('starting cem at t{}...'.format(self.t)) timings = OrderedDict() t = time.time() if not self._hp.reuse_cov or self.t < 2: self.sigma = construct_initial_sigma(self._hp, self.adim, self.t) self.sigma_prev = self.sigma else: self.sigma = reuse_cov(self.sigma, self.adim, self._hp) if not self._hp.reuse_mean or self.t < 2: self.mean = np.zeros(self.adim * self.naction_steps) else: self.mean = reuse_action(self.bestaction, self._hp) if (self._hp.reuse_mean or self._hp.reuse_cov) and self.t >= 2: self.M = self._hp.num_samples[1] self.K = int(np.ceil(self.M * self._hp.selection_frac)) self.bestindices_of_iter = np.zeros((self.niter, self.K)) self.cost_perstep = np.zeros([ self.M, self.ncam, self.ndesig, self.repeat * self.naction_steps - self.ncontxt ]) self.logger.log('M {}, K{}'.format(self.M, self.K)) self.logger.log('------------------------------------------------') self.logger.log('starting CEM cylce') timings['pre_itr'] = time.time() - t for itr in range(self.niter): itr_times = OrderedDict() self.logger.log('------------') self.logger.log('iteration: ', itr) t_startiter = time.time() if self._hp.custom_sampler is None: if self._hp.rejection_sampling: actions = self.sample_actions_rej() else: actions = self.sample_actions(self.mean, self.sigma, self._hp, self.M) if self._hp.autograsp_epsilon[0] is not None: assert len(self._hp.autograsp_epsilon) == 2 or len(self._hp.autograsp_epsilon) == 3, \ "Should be array of [z_thresh, epsilon] or [z_thresh, epsilon, norm]" if len(self._hp.autograsp_epsilon) == 2: self._hp.autograsp_epsilon = [ i for i in self._hp.autograsp_epsilon ] + [1] actions = apply_ag_epsilon(actions, self.state, self._hp, self._close_override, self.t < self._hp.repeat) else: sampler = self._hp.custom_sampler(self.sigma, self.mean, self._hp, self.repeat, self.adim) actions = sampler.sample(self.M, self.state) itr_times['action_sampling'] = time.time() - t_startiter t_start = time.time() scores = self.get_rollouts(actions, itr, itr_times) itr_times['vid_pred_total'] = time.time() - t_start t = time.time() self.logger.log( 'overall time for evaluating actions {}'.format(time.time() - t_start)) if self._hp.stochastic_planning: actions, scores = self.action_preselection(actions, scores) self.indices = scores.argsort()[:self.K] self.bestindices_of_iter[itr] = self.indices self.bestaction_withrepeat = actions[self.indices[0]] self.plan_stat['scores_itr{}'.format(itr)] = scores self.plan_stat['bestscore_itr{}'.format(itr)] = scores[ self.indices[0]] if hasattr(self, 'best_cost_perstep'): self.plan_stat['best_cost_perstep'] = self.best_cost_perstep actions_flat = self.post_process_actions(actions) self.fit_gaussians(actions_flat) self.logger.log('iter {0}, bestscore {1}'.format( itr, scores[self.indices[0]])) self.logger.log( 'overall time for iteration {}'.format(time.time() - t_startiter)) itr_times['post_pred'] = time.time() - t timings['itr{}'.format(itr)] = itr_times # pkl.dump(timings, open('{}/timings_CEM_{}.pkl'.format(self.agentparams['record'], self.t), 'wb')) def sample_actions(self, mean, sigma, hp, M): actions = np.random.multivariate_normal(mean, sigma, M) actions = actions.reshape(M, hp.naction_steps, hp.adim) if hp.discrete_ind != None: actions = discretize(actions, M, hp.naction_steps, hp.discrete_ind) if hp.action_bound: actions = truncate_movement(actions, hp) actions = np.repeat(actions, hp.repeat, axis=1) if hp.add_zero_action: actions[0] = 0 return actions def fit_gaussians(self, actions_flat): arr_best_actions = actions_flat[ self.indices] # only take the K best actions self.sigma = np.cov(arr_best_actions, rowvar=False, bias=False) if self._hp.cov_blockdiag: self.sigma = make_blockdiagonal(self.sigma, self.naction_steps, self.adim) if self._hp.smooth_cov: self.sigma = 0.5 * self.sigma + 0.5 * self.sigma_prev self.sigma_prev = self.sigma self.mean = np.mean(arr_best_actions, axis=0) def post_process_actions(self, actions): num_ex = self.M // self.smp_peract actions = actions.reshape(num_ex, self.naction_steps, self.repeat, self.adim) actions = actions[:, :, -1, :] # taking only one of the repeated actions actions_flat = actions.reshape(num_ex, self.naction_steps * self.adim) self.bestaction = actions[self.indices[0]] return actions_flat def sample_actions_rej(self): """ Perform rejection sampling :return: """ runs = [] actions = [] if self._hp.stochastic_planning: num_distinct_actions = self.M // self.smp_peract else: num_distinct_actions = self.M for i in range(num_distinct_actions): ok = False i = 0 while not ok: i += 1 action_seq = np.random.multivariate_normal( self.mean, self.sigma, 1) action_seq = action_seq.reshape(self.naction_steps, self.adim) xy_std = self._hp.initial_std lift_std = self._hp.initial_std_lift std_fac = 1.5 if np.any(action_seq[:, :2] > xy_std*std_fac) or \ np.any(action_seq[:, :2] < -xy_std*std_fac) or \ np.any(action_seq[:, 2] > lift_std*std_fac) or \ np.any(action_seq[:, 2] < -lift_std*std_fac): ok = False else: ok = True runs.append(i) actions.append(action_seq) actions = np.stack(actions, axis=0) if self._hp.stochastic_planning: actions = np.repeat(actions, self._hp.stochastic_planning[0], 0) self.logger.log('rejection smp max trials', max(runs)) if self._hp.discrete_ind != None: actions = self.discretize(actions) actions = np.repeat(actions, self.repeat, axis=1) self.logger.log('max action val xy', np.max(actions[:, :, :2])) self.logger.log('max action val z', np.max(actions[:, :, 2])) return actions def action_preselection(self, actions, scores): actions = actions.reshape((self.M // self.smp_peract, self.smp_peract, self.naction_steps, self.repeat, self.adim)) scores = scores.reshape((self.M // self.smp_peract, self.smp_peract)) if self._hp.stochastic_planning[1] == 'optimistic': inds = np.argmax(scores, axis=1) scores = np.max(scores, axis=1) elif self._hp.stochastic_planning[1] == 'pessimistic': inds = np.argmin(scores, axis=1) scores = np.min(scores, axis=1) else: raise ValueError actions = [ actions[b, inds[b]] for b in range(self.M // self.smp_peract) ] return np.stack(actions, 0), scores def get_rollouts(self, actions, cem_itr, itr_times): raise NotImplementedError def act(self, t=None, i_tr=None): """ Return a random action for a state. Args: if performing highres tracking images is highres image t: the current controller's Time step goal_pix: in coordinates of small image desig_pix: in coordinates of small image """ self.i_tr = i_tr self.t = t if t == 0: action = np.zeros(self.agentparams['adim']) self._close_override = False else: if self._hp.use_first_plan: self.logger.log('using actions of first plan, no replanning!!') if t == 1: self.perform_CEM() action = self.bestaction_withrepeat[t] elif self._hp.replan_interval != -1: if (t - 1) % self._hp.replan_interval == 0: self.last_replan = t self.perform_CEM() self.logger.log('last replan', self.last_replan) self.logger.log('taking action of ', t - self.last_replan) action = self.bestaction_withrepeat[t - self.last_replan] else: self.perform_CEM() action = self.bestaction[0] self.logger.log('########') self.logger.log('best action sequence: ') for i in range(self.bestaction.shape[0]): self.logger.log("t{}: {}".format(i, self.bestaction[i])) self.logger.log('########') self.action_list.append(action) self.logger.log("applying action {}".format(action)) if self.agentparams['adim'] == 5 and action[-1] > 0: self._close_override = True else: self._close_override = False return {'actions': action, 'plan_stat': self.plan_stat}
def sync(node_id, conf, printout=False): experiment_name = str.split(conf['current_dir'], '/')[-1] master_datadir = '/raid/ngc2/pushing_data/cartgripper/onpolicy/{}'.format( experiment_name) master_scoredir = '/raid/ngc2/pushing_data/cartgripper/onpolicy/{}/scores'.format( experiment_name) exp_subpath = conf['current_dir'].partition('onpolicy')[2] master_base_dir = '/home/ngc2/Documents/visual_mpc/experiments/cem_exp/onpolicy' + exp_subpath master_modeldata_dir = master_base_dir + '/modeldata' master_logging_dir = master_base_dir + '/logging_datacollectors' logging_dir = conf['agent']['logging_dir'] logger = Logger(logging_dir, 'sync_node{}.txt'.format(node_id), printout=printout) logger.log('started remote sync process on node{}'.format(node_id)) # local means "locally" in the container on ngc2 local_modeldata_dir = '/result/modeldata' local_datadir = '/result/data' local_scoredir = '/result/data/scores' if not os.path.exists(local_modeldata_dir): os.makedirs(local_modeldata_dir) while True: logger.log('get latest weights from master') cmd = 'rsync -rltgoDv --delete-after {}:{} {}'.format( master, master_modeldata_dir + '/', local_modeldata_dir) logger.log('executing: {}'.format(cmd)) os.system(cmd) transfer_tfrecs(local_datadir, master_datadir, logger, 'train') transfer_tfrecs(local_datadir, master_datadir, logger, 'val') logger.log('transfer scorefiles to master') cmd = 'rsync -a --update {} {}:{}'.format(local_scoredir + '/', master, master_scoredir) logger.log('executing: {}'.format(cmd)) os.system(cmd) logger.log('transfer logfiles to master') cmd = 'rsync -a --update {} {}:{}'.format(logging_dir + '/', master, master_logging_dir) logger.log('executing: {}'.format(cmd)) os.system(cmd) time.sleep(10)
def __init__(self, dict_=None, append_masks=True, filepath=None, dict_name=None, numex=4, suf="", col_titles=None, renorm_heatmaps=True, logger=None): """ :param dict_: dictionary containing image tensors :param append_masks: whether to visualize the masks :param gif_savepath: the path to save the gif :param numex: how many examples of the batch to visualize :param suf: append a suffix to the gif name :param col_titles: a list of titles for each column The dictionary contains keys-values pairs of {"video_name":"image_tensor_list"} where "video_name" is used as the caption in the visualization where "image_tensor_list" is a list with np.arrays (batchsize, 64,64,n_channel) for each time step. If n_channel is 1 a heatmap will be shown. Use renorm_heatmaps=True to normalize the heatmaps at every time step (this is necessary when the range of values changes significantly over time). If the key contains the string "flow" a color-coded flow field will be shown. if the key contains the string "masks" the image_tensor_list needs to be of the following form: [mask_list_0, ..., mask_list_Tmax] where mask_list_t = [mask_0, ..., mask_N] where mask_i.shape = [batch_size, 64,64,1] """ if logger == None: self.logger = Logger(mute=True) else: self.logger = logger self.gif_savepath = filepath if dict_name != None: dict_ = pickle.load(open(filepath + '/' + dict_name, "rb")) self.dict_ = dict_ if 'iternum' in dict_: self.iternum = dict_['iternum'] del dict_['iternum'] else: self.iternum = "" if 'gen_images' in dict_: gen_images = dict_['gen_images'] if gen_images[0].shape[0] < numex: raise ValueError( "batchsize too small for providing desired number of exmaples!" ) self.numex = numex self.video_list = [] self.append_masks = False for key in list(dict_.keys()): data = dict_[key] if key == 'ground_truth': # special treatement for gtruth ground_truth = dict_['ground_truth'] if not isinstance(ground_truth, list): ground_truth = np.split(ground_truth, ground_truth.shape[1], axis=1) if ground_truth[0].shape[0] == 1: ground_truth = [ g.reshape((1, 64, 64, 3)) for g in ground_truth ] else: ground_truth = [np.squeeze(g) for g in ground_truth] ground_truth = ground_truth[1:] if 'overlay_' + key in dict_: overlay_points = dict_['overlay_' + key] self.video_list.append( (ground_truth, 'Ground Truth', overlay_points)) else: self.video_list.append((ground_truth, 'Ground Truth')) elif 'overlay' in key: self.logger.log('visualizing overlay') images = data[0] gen_distrib = data[1] gen_distrib = color_code_distrib(gen_distrib, self.numex, renormalize=True) if gen_distrib[0].shape != images[0].shape: images = resize_image(images, gen_distrib[0].shape[1:3]) overlay = compute_overlay(images, gen_distrib, self.numex) self.video_list.append((overlay, key)) elif type(data[0]) is list or '_l' in key: # for lists of videos if 'masks' in key and not append_masks: self.logger.log('skipping masks!') continue self.logger.log("the key \"{}\" contains {} videos".format( key, len(data[0]))) self.append_masks = True vid_list = convert_to_videolist(data, repeat_last_dim=False) for i, m in enumerate(vid_list): self.video_list.append((m, '{} {}'.format(key, i))) elif 'flow' in key: self.logger.log( 'visualizing key {} with colorflow'.format(key)) self.video_list.append((visualize_flow(data), key)) elif 'actions' in key: self.visualize_states_actions(dict_['states'], dict_['actions']) elif 'gen_distrib' in key: # if gen_distrib plot psum overtime! self.video_list.append((data, key)) else: if isinstance(data, list): if len(data[0].shape) == 4: self.video_list.append((data, key)) else: raise "wrong shape in key {} with shape {}".format( key, data[0].shape) else: self.logger.log('ignoring key ', key) if key == 'scores': self.video_list.append((self.get_score_images(data), key)) self.renormalize_heatmaps = renorm_heatmaps self.logger.log('renormalizing heatmaps: ', self.renormalize_heatmaps) self.t = 0 self.suf = suf self.num_rows = len(self.video_list) self.col_titles = col_titles
class Visualizer_tkinter(object): def __init__(self, dict_=None, append_masks=True, filepath=None, dict_name=None, numex=4, suf="", col_titles=None, renorm_heatmaps=True, logger=None): """ :param dict_: dictionary containing image tensors :param append_masks: whether to visualize the masks :param gif_savepath: the path to save the gif :param numex: how many examples of the batch to visualize :param suf: append a suffix to the gif name :param col_titles: a list of titles for each column The dictionary contains keys-values pairs of {"video_name":"image_tensor_list"} where "video_name" is used as the caption in the visualization where "image_tensor_list" is a list with np.arrays (batchsize, 64,64,n_channel) for each time step. If n_channel is 1 a heatmap will be shown. Use renorm_heatmaps=True to normalize the heatmaps at every time step (this is necessary when the range of values changes significantly over time). If the key contains the string "flow" a color-coded flow field will be shown. if the key contains the string "masks" the image_tensor_list needs to be of the following form: [mask_list_0, ..., mask_list_Tmax] where mask_list_t = [mask_0, ..., mask_N] where mask_i.shape = [batch_size, 64,64,1] """ if logger == None: self.logger = Logger(mute=True) else: self.logger = logger self.gif_savepath = filepath if dict_name != None: dict_ = pickle.load(open(filepath + '/' + dict_name, "rb")) self.dict_ = dict_ if 'iternum' in dict_: self.iternum = dict_['iternum'] del dict_['iternum'] else: self.iternum = "" if 'gen_images' in dict_: gen_images = dict_['gen_images'] if gen_images[0].shape[0] < numex: raise ValueError( "batchsize too small for providing desired number of exmaples!" ) self.numex = numex self.video_list = [] self.append_masks = False for key in list(dict_.keys()): data = dict_[key] if key == 'ground_truth': # special treatement for gtruth ground_truth = dict_['ground_truth'] if not isinstance(ground_truth, list): ground_truth = np.split(ground_truth, ground_truth.shape[1], axis=1) if ground_truth[0].shape[0] == 1: ground_truth = [ g.reshape((1, 64, 64, 3)) for g in ground_truth ] else: ground_truth = [np.squeeze(g) for g in ground_truth] ground_truth = ground_truth[1:] if 'overlay_' + key in dict_: overlay_points = dict_['overlay_' + key] self.video_list.append( (ground_truth, 'Ground Truth', overlay_points)) else: self.video_list.append((ground_truth, 'Ground Truth')) elif 'overlay' in key: self.logger.log('visualizing overlay') images = data[0] gen_distrib = data[1] gen_distrib = color_code_distrib(gen_distrib, self.numex, renormalize=True) if gen_distrib[0].shape != images[0].shape: images = resize_image(images, gen_distrib[0].shape[1:3]) overlay = compute_overlay(images, gen_distrib, self.numex) self.video_list.append((overlay, key)) elif type(data[0]) is list or '_l' in key: # for lists of videos if 'masks' in key and not append_masks: self.logger.log('skipping masks!') continue self.logger.log("the key \"{}\" contains {} videos".format( key, len(data[0]))) self.append_masks = True vid_list = convert_to_videolist(data, repeat_last_dim=False) for i, m in enumerate(vid_list): self.video_list.append((m, '{} {}'.format(key, i))) elif 'flow' in key: self.logger.log( 'visualizing key {} with colorflow'.format(key)) self.video_list.append((visualize_flow(data), key)) elif 'actions' in key: self.visualize_states_actions(dict_['states'], dict_['actions']) elif 'gen_distrib' in key: # if gen_distrib plot psum overtime! self.video_list.append((data, key)) else: if isinstance(data, list): if len(data[0].shape) == 4: self.video_list.append((data, key)) else: raise "wrong shape in key {} with shape {}".format( key, data[0].shape) else: self.logger.log('ignoring key ', key) if key == 'scores': self.video_list.append((self.get_score_images(data), key)) self.renormalize_heatmaps = renorm_heatmaps self.logger.log('renormalizing heatmaps: ', self.renormalize_heatmaps) self.t = 0 self.suf = suf self.num_rows = len(self.video_list) self.col_titles = col_titles def get_score_images(self, scores): height = self.video_list[0][0][0].shape[1] width = self.video_list[0][0][0].shape[2] seqlen = len(self.video_list[0][0]) txt_im = [] for i in range(self.numex): txt_im.append( draw_text_image(str(scores[i]), image_size=(height, width))) textrow = np.stack(txt_im, 0) textrow = [textrow for _ in range(seqlen)] return textrow def make_direct_vid(self, separate_vid=False, resize=None): self.logger.log('making gif with tags') new_videolist = [] for vid in self.video_list: print('key', vid[1]) print('len', len(vid[0])) print('sizes', [im.shape for im in vid[0]]) print('####') if 'gen_distrib' in vid[1]: plt.switch_backend('TkAgg') # plt.imshow(vid[0][0][0]) # plt.show() images = vid[0] if resize is not None: images = resize_image(images, size=resize) name = vid[1] if images[0].shape[-1] == 1 or len(images[0].shape) == 3: images = color_code_distrib(images, self.numex, renormalize=True) new_videolist.append((images, name)) framelist = assemble_gif(new_videolist, convert_from_float=True, num_exp=self.numex) # save_video_mp4(self.gif_savepath +'/prediction_at_t{}') npy_to_gif( framelist, self.gif_savepath + '/direct{}{}'.format(self.iternum, self.suf)) def visualize_states_actions(self, states, actions): plt.figure(figsize=(25, 2), dpi=80) for ex in range(self.numex): plt.subplot(1, self.numex, ex + 1) plt.axis('equal') move = actions[ex, :, :2] updown = actions[ex, :, 2] rot = actions[ex, :, 3] open = actions[ex, :4] state_xy = states[ex, :, :2] alpha = states[ex, :, 3] action_startpoints = state_xy action_endpoints = state_xy + move plt.plot(state_xy[:, 0], state_xy[:, 1], '-o') plt.ylim([-0.17, 0.17]) plt.xlim([0.46, 0.83]) for t in range(states.shape[1]): x = [action_startpoints[t, 0], action_endpoints[t, 0]] y = [action_startpoints[t, 1], action_endpoints[t, 1]] if t % 2 == 0: plt.plot(x, y, '--r') else: plt.plot(x, y, '--y') # plt.show() plt.savefig(self.gif_savepath + "/actions_vis.png") plt.close('all') def make_image_strip(self, i_ex, tstart=1, tend=13): """ :param i_ex: the index of the example to flatten to the image strip :param tstart: :param tend: :return: """ cols = tend - tstart + 1 width_per_ex = 1. standard_size = np.array([width_per_ex * cols, self.num_rows * 1.0]) ### 1.5 # standard_size = np.array([6, 24]) figsize = (standard_size * 1.0).astype(np.int) fig = plt.figure(num=1, figsize=figsize) outer_grid = gridspec.GridSpec(self.num_rows, 1) drow = 1. / self.num_rows self.im_handle_list = [] axes_list = [] for row in range(self.num_rows): inner_grid = gridspec.GridSpecFromSubplotSpec( 1, cols, subplot_spec=outer_grid[row], wspace=0.0, hspace=0.0) image_row = self.video_list[row][0] im_handle_row = [] col = 0 for t in range(tstart, tend): ax = plt.Subplot(fig, inner_grid[col]) ax.set_xticks([]) ax.set_yticks([]) axes_list.append(fig.add_subplot(ax)) if image_row[0][i_ex].shape[-1] == 1: im_handle = axes_list[-1].imshow(np.squeeze( image_row[t][i_ex]), zorder=0, cmap=plt.get_cmap('jet'), interpolation='none', animated=True) else: im_handle = axes_list[-1].imshow(image_row[t][i_ex], interpolation='none', animated=True) im_handle_row.append(im_handle) col += 1 self.im_handle_list.append(im_handle_row) # plt.figtext(.5, 1 - (row * drow * 0.990) - 0.01, self.video_list[row][1], va="center", ha="center", # size=8) plt.savefig(self.gif_savepath + '/iter{}ex{}_overtime.png'.format(self.iternum, i_ex)) def build_figure(self): self.logger.log('building figure...') # plot each markevery case for linear x and y scales root = Tk.Tk() root.rowconfigure(1, weight=1) root.columnconfigure(1, weight=1) frame = Frame(root) frame.grid(column=1, row=1, sticky=tkinter.constants.NSEW) frame.rowconfigure(1, weight=1) frame.columnconfigure(1, weight=1) if self.numex == 1: width_per_ex = 1.5 else: width_per_ex = 0.9 standard_size = np.array( [width_per_ex * self.numex, self.num_rows * 1.0]) ### 1.5 # standard_size = np.array([6, 24]) figsize = (standard_size * 1.0).astype(np.int) fig = plt.figure(num=1, figsize=figsize) self.addScrollingFigure(fig, frame) buttonFrame = Frame(root) buttonFrame.grid(row=1, column=2, sticky=tkinter.constants.NS) biggerButton = Button(buttonFrame, text="larger", command=lambda: self.changeSize(fig, 1.5)) biggerButton.grid(column=1, row=1) smallerButton = Button(buttonFrame, text="smaller", command=lambda: self.changeSize(fig, .5)) smallerButton.grid(column=1, row=2) axes_list = [] l = [] for vid in self.video_list: l.append(len(vid[0])) tlen = np.min(np.array(l)) self.logger.log('minimum video length', tlen) outer_grid = gridspec.GridSpec(self.num_rows, 1) drow = 1. / self.num_rows self.im_handle_list = [] self.plot_handle_list = [] for row in range(self.num_rows): inner_grid = gridspec.GridSpecFromSubplotSpec( 1, self.numex, subplot_spec=outer_grid[row], wspace=0.0, hspace=0.0) image_row = self.video_list[row][0] im_handle_row = [] plot_handle_row = [] for col in range(self.numex): ax = plt.Subplot(fig, inner_grid[col]) ax.set_xticks([]) ax.set_yticks([]) axes_list.append(fig.add_subplot(ax)) if row == 0 and self.col_titles != None: axes_list[-1].set_title(self.col_titles[col]) if image_row[0][col].shape[-1] == 1: im_handle = axes_list[-1].imshow( np.squeeze(image_row[0][col]), # first timestep zorder=0, cmap=plt.get_cmap('jet'), interpolation='none', animated=True) else: im_handle = axes_list[-1].imshow(image_row[0][col], interpolation='none', animated=True) if len(self.video_list[row]) == 3: #overlay with markers: coords = self.video_list[row][2][t][col] plothandle = axes_list[-1].scatter(coords[1], coords[0], marker="d", s=70, edgecolors='r', facecolors="None") axes_list[-1].set_xlim(0, 63) axes_list[-1].set_ylim(63, 0) plot_handle_row.append(plothandle) else: plot_handle_row.append(None) im_handle_row.append(im_handle) self.im_handle_list.append(im_handle_row) self.plot_handle_list.append(plot_handle_row) plt.figtext(.5, 1 - (row * drow * 1.) - 0.001, self.video_list[row][1], va="center", ha="center", size=8) plt.axis('off') fig.tight_layout() # Set up formatting for the movie files Writer = animation.writers['imagemagick_file'] writer = Writer(fps=15, metadata=dict(artist='Me'), bitrate=1800) # call the animator. blit=True means only re-draw the parts that have changed. anim = animation.FuncAnimation(fig, self.animate, fargs=[ self.im_handle_list, self.plot_handle_list, self.video_list, self.numex, self.num_rows, tlen ], frames=tlen, interval=200, blit=True) if self.append_masks: self.suf = '_masks' + self.suf if self.gif_savepath != None: filepath = self.gif_savepath + '/animation{}{}.gif'.format( self.iternum, self.suf) # filepath = self.gif_savepath + '/animation{}{}.mp4'.format(self.iternum,self.suf) self.logger.log('saving gif under: ', filepath) anim.save(filepath, writer='imagemagick') root.mainloop() def changeSize(self, figure, factor): global canvas, mplCanvas, interior, interior_id, frame, cwid oldSize = figure.get_size_inches() self.logger.log(("old size is", oldSize)) figure.set_size_inches([factor * s for s in oldSize]) wi, hi = [i * figure.dpi for i in figure.get_size_inches()] self.logger.log(("new size is", figure.get_size_inches())) self.logger.log(("new size pixels: ", wi, hi)) mplCanvas.config(width=wi, height=hi) printBboxes("A") # mplCanvas.grid(sticky=Tkconstants.NSEW) canvas.itemconfigure(cwid, width=wi, height=hi) printBboxes("B") canvas.config(scrollregion=canvas.bbox(tkinter.constants.ALL), width=200, height=200) figure.canvas.draw() printBboxes("C") self.logger.log() def addScrollingFigure(self, figure, frame): global canvas, mplCanvas, interior, interior_id, cwid # set up a canvas with scrollbars canvas = Canvas(frame) canvas.grid(row=1, column=1, sticky=tkinter.constants.NSEW) xScrollbar = Scrollbar(frame, orient=tkinter.constants.HORIZONTAL) yScrollbar = Scrollbar(frame) xScrollbar.grid(row=2, column=1, sticky=tkinter.constants.EW) yScrollbar.grid(row=1, column=2, sticky=tkinter.constants.NS) canvas.config(xscrollcommand=xScrollbar.set) xScrollbar.config(command=canvas.xview) canvas.config(yscrollcommand=yScrollbar.set) yScrollbar.config(command=canvas.yview) # plug in the figure figAgg = FigureCanvasTkAgg(figure, canvas) mplCanvas = figAgg.get_tk_widget() # mplCanvas.grid(sticky=Tkconstants.NSEW) # and connect figure with scrolling region cwid = canvas.create_window(0, 0, window=mplCanvas, anchor=tkinter.constants.NW) printBboxes("Init") canvas.config(scrollregion=canvas.bbox(tkinter.constants.ALL), width=200, height=200) def animate(self, *args): global t _, im_handle_list, plot_handle_list, video_list, num_ex, num_rows, tlen = args artistlist = [] for row in range(num_rows): image_row = video_list[row][0] #0 stands for images for col in range(num_ex): if image_row[0][col].shape[ -1] == 1: # if visualizing with single-channel heatmap im = np.squeeze(image_row[t][col]) if self.renormalize_heatmaps: im = im / (np.max(im) + 1e-5) im_handle_list[row][col].set_array(im) else: im_handle_list[row][col].set_array(image_row[t][col]) if len(video_list[row]) == 3: overlay_row = video_list[row][2] #2 stands for overlay plot_handle_list[row][col].set_array( overlay_row[t][col]) #2 stands for overlay # print "set array to", overlay_row[t][col] artistlist.append(plot_handle_list[row][col]) artistlist += im_handle_list[row] # print 'update at t', t t += 1 if t == tlen: t = 0 return artistlist
class ReplayBuffer(object): def __init__(self, conf, data_collectors=None, todo_ids=None, printout=False, mode='train'): self.logger = Logger(conf['logging_dir'], 'replay_log.txt', printout=printout) self.conf = conf self.onpolconf = conf['onpolconf'] if 'agent' in conf: self.agentparams = conf['agent'] self.ring_buffer = [] self.mode = mode self.maxsize = self.onpolconf['replay_size'][mode] self.batch_size = conf['batch_size'] self.data_collectors = data_collectors self.todo_ids = todo_ids self.scores = [] self.num_updates = 0 self.logger.log('init Replay buffer') self.tstart = time.time() def push_back(self, traj): assert traj.images.dtype == np.float32 and np.max(traj.images) <= 1.0 self.ring_buffer.append(traj) if len(self.ring_buffer) > self.maxsize: self.ring_buffer.pop(0) if len(self.ring_buffer) % 100 == 0: self.logger.log('current size {}'.format(len(self.ring_buffer))) def get_batch(self): images = [] states = [] actions = [] current_size = len(self.ring_buffer) for b in range(self.batch_size): i = random.randint(0, current_size-1) traj = self.ring_buffer[i] images.append(traj.images) states.append(traj.X_Xdot_full) actions.append(traj.actions) return np.stack(images,0), np.stack(states,0), np.stack(actions,0) def update(self, sess): done_id, self.todo_ids = ray.wait(self.todo_ids, timeout=0) if len(done_id) != 0: self.logger.log("len doneid {}".format(len(done_id))) for id in done_id: traj, info = ray.get(id) self.logger.log("received trajectory from {}, pushing back traj".format(info['collector_id'])) self.push_back(traj) self.scores.append(traj.final_poscost) # relauch the collector if it hasn't done all its work yet. returning_collector = self.data_collectors[info['collector_id']] self.todo_ids.append(returning_collector.run_traj.remote()) self.logger.log('restarting {}'.format(info['collector_id'])) self.num_updates += 1 if self.num_updates % 100 == 0: plot_scores(self.scores, self.agentparams['result_dir']) self.logger.log('traj_per hour: {}'.format(self.num_updates/((time.time() - self.tstart)/3600))) self.logger.log('avg time per traj {}s'.format((time.time() - self.tstart)/self.num_updates))
def trainvid_online(train_replay_buffer, val_replay_buffer, conf, logging_dir, gpu_id, printout=False): logger = Logger(logging_dir, 'trainvid_online_log.txt', printout=printout) logger.log('starting trainvid online') os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id) logger.log('training video prediction using cuda_visible_devices=', os.environ["CUDA_VISIBLE_DEVICES"]) from tensorflow.python.client import device_lib logger.log(device_lib.list_local_devices()) if 'RESULT_DIR' in os.environ: conf['output_dir'] = os.environ['RESULT_DIR'] + '/modeldata' conf['event_log_dir'] = conf['output_dir'] if 'TEN_DATA' in os.environ: tenpath = conf['pretrained_model'].partition('tensorflow_data')[2] conf['pretrained_model'] = os.environ['TEN_DATA'] + tenpath gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9) g_vidpred = tf.Graph() sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, allow_soft_placement=True), graph=g_vidpred) with sess.as_default(): with g_vidpred.as_default(): tf.train.start_queue_runners(sess) sess.run(tf.global_variables_initializer()) train_replay_buffer.preload(conf) Model = conf['pred_model'] model = Model(conf, load_data=False, build_loss=True) logger.log('Constructing saver.') vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) vars = filter_vars(vars) saving_saver = tf.train.Saver(vars, max_to_keep=0) summary_writer = tf.summary.FileWriter(conf['event_log_dir'], graph=sess.graph, flush_secs=10) if conf['pred_model'] == Alex_Interface_Model: if gfile.Glob(conf['pretrained_model'] + '*') is None: raise ValueError("Model file {} not found!".format( conf['pretrained_model'])) model.m.restore(sess, conf['pretrained_model']) else: vars = variable_checkpoint_matcher(conf, vars, conf['pretrained_model']) loading_saver = tf.train.Saver(vars, max_to_keep=0) load_checkpoint(conf, sess, loading_saver, conf['pretrained_model']) logger.log( '-------------------------------------------------------------------' ) logger.log('verify current settings!! ') for key in list(conf.keys()): logger.log(key, ': ', conf[key]) logger.log( '-------------------------------------------------------------------' ) tf.logging.set_verbosity(tf.logging.INFO) starttime = time.time() t_iter = [] for itr in range(0, conf['num_iterations'], 1): if itr % 10 == 0: tstart_rb_update = time.time() train_replay_buffer.update(sess) if itr % 100 == 0: logger.log( "took {} to update the replay buffer".format( time.time() - tstart_rb_update)) t_startiter = time.time() images, states, actions = train_replay_buffer.get_batch() feed_dict = { model.iter_num: np.float32(itr), model.images_pl: images, model.actions_pl: actions, model.states_pl: states } if conf['pred_model'] == Alex_Interface_Model: cost, _, summary_str = sess.run([ model.m.g_loss, model.m.train_op, model.m.train_summ_op ], feed_dict) else: cost, _, summary_str = sess.run( [model.loss, model.train_op, model.train_summ_op], feed_dict) t_iter.append(time.time() - t_startiter) if (itr) % 10 == 0: logger.log('cost ' + str(itr) + ' ' + str(cost)) if (itr) % VAL_INTERVAL == 0: val_replay_buffer.update(sess) images, states, actions = val_replay_buffer.get_batch() feed_dict = { model.iter_num: np.float32(itr), model.images_pl: images, model.actions_pl: actions, model.states_pl: states } if conf['pred_model'] == Alex_Interface_Model: [summary_str] = sess.run([model.m.val_summ_op], feed_dict) else: [summary_str] = sess.run([model.val_summ_op], feed_dict) summary_writer.add_summary(summary_str, itr) if (itr) % VIDEO_INTERVAL == 0: feed_dict = { model.iter_num: np.float32(itr), model.images_pl: images, model.actions_pl: actions, model.states_pl: states } video_proto = sess.run(model.train_video_summaries, feed_dict=feed_dict) summary_writer.add_summary( convert_tensor_to_gif_summary(video_proto), itr) save_interval = conf['onpolconf']['save_interval'] if (itr) % save_interval == 0 and itr != 0: oldmodelname = conf['output_dir'] + '/model' + str( itr - save_interval) if gfile.Glob(oldmodelname + '*') != [] and (itr - save_interval) > 0: print('deleting {}*'.format(oldmodelname)) os.system("rm {}*".format(oldmodelname)) logger.log('Saving model to' + conf['output_dir']) newmodelname = conf['output_dir'] + '/model' + str(itr) saving_saver.save(sess, newmodelname) if itr % 50 == 1: hours = (time.time() - starttime) / 3600 logger.log('running for {}h'.format(hours)) avg_t_iter = np.mean(t_iter[-100:]) logger.log( 'average time per iteration: {0}s'.format(avg_t_iter)) logger.log('expected for complete training: {0}h '.format( avg_t_iter / 3600 * conf['num_iterations'])) if (itr) % SUMMARY_INTERVAL == 2: summary_writer.add_summary(summary_str, itr) return t_iter