Exemple #1
0
    def __init__(self, config, gpu_id=0, ngpu=1, logger=None):
        self._hyperparams = config
        self.agent = config['agent']['type'](config['agent'])
        self.agentparams = config['agent']
        self.policyparams = config['policy']
        if logger == None:
            self.logger = Logger(printout=True)
        else:
            self.logger = logger
        self.logger.log('started sim')
        self.agentparams['gpu_id'] = gpu_id

        self.policy = config['policy']['type'](self.agent._hyperparams,
                                               config['policy'], gpu_id, ngpu)

        self._record_queue = config.pop('record_saver', None)
        self._counter = config.pop('counter', None)

        self.trajectory_list = []
        self.im_score_list = []
        try:
            os.remove(self._hyperparams['agent']['image_dir'])
        except:
            pass
        self.task_mode = 'train'
 def __init__(self, conf, data_collectors=None, todo_ids=None, printout=False, mode='train'):
     self.logger = Logger(conf['logging_dir'], 'replay_log.txt', printout=printout)
     self.conf = conf
     self.onpolconf = conf['onpolconf']
     if 'agent' in conf:
         self.agentparams = conf['agent']
     self.ring_buffer = []
     self.mode = mode
     self.maxsize = self.onpolconf['replay_size'][mode]
     self.batch_size = conf['batch_size']
     self.data_collectors = data_collectors
     self.todo_ids = todo_ids
     self.scores = []
     self.num_updates = 0
     self.logger.log('init Replay buffer')
     self.tstart = time.time()
Exemple #3
0
class Sim(object):
    """ Main class to run algorithms and experiments. """
    def __init__(self, config, gpu_id=0, ngpu=1, logger=None):
        self._hyperparams = config
        self.agent = config['agent']['type'](config['agent'])
        self.agentparams = config['agent']
        self.policyparams = config['policy']
        if logger == None:
            self.logger = Logger(printout=True)
        else:
            self.logger = logger
        self.logger.log('started sim')
        self.agentparams['gpu_id'] = gpu_id

        self.policy = config['policy']['type'](self.agent._hyperparams,
                                               config['policy'], gpu_id, ngpu)

        self._record_queue = config.pop('record_saver', None)
        self._counter = config.pop('counter', None)

        self.trajectory_list = []
        self.im_score_list = []
        try:
            os.remove(self._hyperparams['agent']['image_dir'])
        except:
            pass
        self.task_mode = 'train'

    def run(self):
        if self._counter is None:
            for i in range(self._hyperparams['start_index'],
                           self._hyperparams['end_index'] + 1):
                self.take_sample(i)
        else:
            itr = self._counter.ret_increment()
            while itr < self._hyperparams['ntraj']:
                print('taking sample {} of {}'.format(
                    itr, self._hyperparams['ntraj']))
                self.take_sample(itr)
                itr = self._counter.ret_increment()

    def take_sample(self, sample_index):
        self.policy.reset()
        agent_data, obs_dict, policy_out = self.agent.sample(
            self.policy, sample_index)
        if self._hyperparams.get('save_data', True):
            self.save_data(sample_index, agent_data, obs_dict, policy_out)
        return agent_data

    def save_data(self, itr, agent_data, obs_dict, policy_outputs):
        if self._hyperparams.get('save_only_good',
                                 False) and not agent_data['goal_reached']:
            return

        if self._hyperparams.get('save_raw_images', False):
            self._save_raw_data(itr, agent_data, obs_dict, policy_outputs)
        elif self._record_queue is not None:
            self._record_queue.put((agent_data, obs_dict, policy_outputs))
        else:
            raise ValueError('Saving neither raw data nor records')

    def _save_raw_data(self, itr, agent_data, obs_dict, policy_outputs):
        if 'RESULT_DIR' in os.environ:
            data_save_dir = os.environ['RESULT_DIR'] + '/data'
        else:
            data_save_dir = self.agentparams['data_save_dir']

        ngroup = self._hyperparams.get('ngroup', 1000)
        igrp = itr // ngroup
        group_folder = data_save_dir + '/{}/traj_group{}'.format(
            self.task_mode, igrp)
        if not os.path.exists(group_folder):
            os.makedirs(group_folder)

        traj_folder = group_folder + '/traj{}'.format(itr)
        if os.path.exists(traj_folder):
            print('trajectory folder {} already exists, deleting the folder'.
                  format(traj_folder))
            shutil.rmtree(traj_folder)

        os.makedirs(traj_folder)
        print('writing: ', traj_folder)
        if 'images' in obs_dict:
            images = obs_dict.pop('images')
            T, n_cams = images.shape[:2]
            for i in range(n_cams):
                os.mkdir(traj_folder + '/images{}'.format(i))
            for t in range(T):
                for i in range(n_cams):
                    cv2.imwrite(
                        '{}/images{}/im_{}.png'.format(traj_folder, i, t),
                        images[t, i, :, :, ::-1])
        with open('{}/agent_data.pkl'.format(traj_folder), 'wb') as file:
            pkl.dump(agent_data, file)
        with open('{}/obs_dict.pkl'.format(traj_folder), 'wb') as file:
            pkl.dump(obs_dict, file)
        with open('{}/policy_out.pkl'.format(traj_folder), 'wb') as file:
            pkl.dump(policy_outputs, file)
Exemple #4
0
    def __init__(self, ag_params, policyparams):
        """
        :param ag_params:
        :param policyparams:
        :param predictor:
        :param save_subdir:
        :param gdnet: goal-distance network
        """
        self._hp = self._default_hparams()
        self.override_defaults(policyparams)

        self.agentparams = ag_params
        if 'logging_dir' in self.agentparams:
            self.logger = Logger(
                self.agentparams['logging_dir'],
                'cem{}log.txt'.format(self.agentparams['gpu_id']))
        else:
            self.logger = Logger(printout=True)
        self.logger.log('init CEM controller')

        self.t = None

        if self._hp.verbose:
            self.verbose = True
            if isinstance(self._hp.verbose, int):
                self.verbose_freq = self._hp.verbose
            else:
                self.verbose_freq = 1
        else:
            self.verbose = False
            self.verbose_freq = 1

        self.niter = self._hp.iterations

        self.action_list = []
        self.naction_steps = self._hp.nactions
        self.repeat = self._hp.repeat

        if isinstance(self._hp.num_samples, list):
            self.M = self._hp.num_samples[0]
        else:
            self.M = self._hp.num_samples

        if self._hp.selection_frac != -1:
            self.K = int(np.ceil(self.M * self._hp.selection_frac))
        else:
            self.K = 10  # only consider K best samples for refitting

        #action dimensions:
        # deltax, delty, goup_nstep, delta_rot, close_nstep
        self.adim = self.agentparams['adim']
        self.sdim = self.agentparams['sdim']  # state dimension

        self.indices = []
        self.mean = None
        self.sigma = None
        self.state = None

        self.dict_ = collections.OrderedDict()

        if 'sawyer' in self.agentparams:
            self.gen_image_publisher = rospy.Publisher('gen_image',
                                                       numpy_msg(floatarray),
                                                       queue_size=10)
            self.gen_pix_distrib_publisher = rospy.Publisher(
                'gen_pix_distrib', numpy_msg(floatarray), queue_size=10)
            self.gen_score_publisher = rospy.Publisher('gen_score',
                                                       numpy_msg(floatarray),
                                                       queue_size=10)

        self.plan_stat = {}  #planning statistics

        self.warped_image_goal, self.warped_image_start = None, None

        if self._hp.stochastic_planning:
            self.smp_peract = self._hp.stochastic_planning[0]
        else:
            self.smp_peract = 1

        self.ncam = 1
        self.ndesig = 1
        self.ncontxt = 0
        self.len_pred = self.repeat * self.naction_steps - self.ncontxt
        self.best_cost_perstep = np.zeros(
            [self.ncam, self.ndesig, self.len_pred])
        self._close_override = False
Exemple #5
0
class CEM_Controller_Base(Policy):
    """
    Cross Entropy Method Stochastic Optimizer
    """
    def __init__(self, ag_params, policyparams):
        """
        :param ag_params:
        :param policyparams:
        :param predictor:
        :param save_subdir:
        :param gdnet: goal-distance network
        """
        self._hp = self._default_hparams()
        self.override_defaults(policyparams)

        self.agentparams = ag_params
        if 'logging_dir' in self.agentparams:
            self.logger = Logger(
                self.agentparams['logging_dir'],
                'cem{}log.txt'.format(self.agentparams['gpu_id']))
        else:
            self.logger = Logger(printout=True)
        self.logger.log('init CEM controller')

        self.t = None

        if self._hp.verbose:
            self.verbose = True
            if isinstance(self._hp.verbose, int):
                self.verbose_freq = self._hp.verbose
            else:
                self.verbose_freq = 1
        else:
            self.verbose = False
            self.verbose_freq = 1

        self.niter = self._hp.iterations

        self.action_list = []
        self.naction_steps = self._hp.nactions
        self.repeat = self._hp.repeat

        if isinstance(self._hp.num_samples, list):
            self.M = self._hp.num_samples[0]
        else:
            self.M = self._hp.num_samples

        if self._hp.selection_frac != -1:
            self.K = int(np.ceil(self.M * self._hp.selection_frac))
        else:
            self.K = 10  # only consider K best samples for refitting

        #action dimensions:
        # deltax, delty, goup_nstep, delta_rot, close_nstep
        self.adim = self.agentparams['adim']
        self.sdim = self.agentparams['sdim']  # state dimension

        self.indices = []
        self.mean = None
        self.sigma = None
        self.state = None

        self.dict_ = collections.OrderedDict()

        if 'sawyer' in self.agentparams:
            self.gen_image_publisher = rospy.Publisher('gen_image',
                                                       numpy_msg(floatarray),
                                                       queue_size=10)
            self.gen_pix_distrib_publisher = rospy.Publisher(
                'gen_pix_distrib', numpy_msg(floatarray), queue_size=10)
            self.gen_score_publisher = rospy.Publisher('gen_score',
                                                       numpy_msg(floatarray),
                                                       queue_size=10)

        self.plan_stat = {}  #planning statistics

        self.warped_image_goal, self.warped_image_start = None, None

        if self._hp.stochastic_planning:
            self.smp_peract = self._hp.stochastic_planning[0]
        else:
            self.smp_peract = 1

        self.ncam = 1
        self.ndesig = 1
        self.ncontxt = 0
        self.len_pred = self.repeat * self.naction_steps - self.ncontxt
        self.best_cost_perstep = np.zeros(
            [self.ncam, self.ndesig, self.len_pred])
        self._close_override = False

    def _default_hparams(self):
        default_dict = {
            'verbose': False,
            'verbose_every_itr': False,
            'niter': 3,
            'num_samples': [200],
            'selection_frac':
            -1.,  # specifcy which fraction of best samples to use to compute mean and var for next CEM iteration
            'discrete_ind': None,
            'reuse_mean': False,
            'reuse_cov': False,
            'stochastic_planning': False,
            'rejection_sampling': True,
            'cov_blockdiag': False,
            'smooth_cov': False,
            'iterations': 3,
            'nactions': 5,
            'repeat': 3,
            'action_bound': True,
            'action_order': [
                None
            ],  # [None] implies default order, otherwise specify how each action dim in order (aka ['x', 'y', ...]
            'initial_std': 0.05,  #std dev. in xy
            'initial_std_lift': 0.15,  #std dev. in xy
            'initial_std_rot': np.pi / 18,
            'initial_std_grasp': 2,
            'autograsp_epsilon': [
                None
            ],  # if autograsp epsilon is not None apply ag_epsilon to gripper dims (last dim if action order not specified)
            'finalweight': 10,
            'use_first_plan': False,
            'custom_sampler': None,
            'replan_interval': -1,
            'type': None,
            'add_zero_action':
            False,  # add one action sample with zero actions, this might prevent random walks in the end
            'reduce_std_dev':
            1.,  # reduce standard dev in later timesteps when reusing action
            'visualize_best':
            True,  # visualizer selects K best if True (random K trajectories otherwise)
        }

        parent_params = super(CEM_Controller_Base, self)._default_hparams()
        for k in default_dict.keys():
            parent_params.add_hparam(k, default_dict[k])
        return parent_params

    def reset(self):
        self.plan_stat = {}  #planning statistics
        self.indices = []
        self.action_list = []

    def perform_CEM(self):
        self.logger.log('starting cem at t{}...'.format(self.t))
        timings = OrderedDict()
        t = time.time()
        if not self._hp.reuse_cov or self.t < 2:
            self.sigma = construct_initial_sigma(self._hp, self.adim, self.t)
            self.sigma_prev = self.sigma
        else:
            self.sigma = reuse_cov(self.sigma, self.adim, self._hp)

        if not self._hp.reuse_mean or self.t < 2:
            self.mean = np.zeros(self.adim * self.naction_steps)
        else:
            self.mean = reuse_action(self.bestaction, self._hp)

        if (self._hp.reuse_mean or self._hp.reuse_cov) and self.t >= 2:
            self.M = self._hp.num_samples[1]
            self.K = int(np.ceil(self.M * self._hp.selection_frac))

        self.bestindices_of_iter = np.zeros((self.niter, self.K))
        self.cost_perstep = np.zeros([
            self.M, self.ncam, self.ndesig,
            self.repeat * self.naction_steps - self.ncontxt
        ])

        self.logger.log('M {}, K{}'.format(self.M, self.K))
        self.logger.log('------------------------------------------------')
        self.logger.log('starting CEM cylce')
        timings['pre_itr'] = time.time() - t
        for itr in range(self.niter):
            itr_times = OrderedDict()
            self.logger.log('------------')
            self.logger.log('iteration: ', itr)
            t_startiter = time.time()
            if self._hp.custom_sampler is None:
                if self._hp.rejection_sampling:
                    actions = self.sample_actions_rej()
                else:
                    actions = self.sample_actions(self.mean, self.sigma,
                                                  self._hp, self.M)

                if self._hp.autograsp_epsilon[0] is not None:
                    assert len(self._hp.autograsp_epsilon) == 2 or len(self._hp.autograsp_epsilon) == 3, \
                        "Should be array of [z_thresh, epsilon] or [z_thresh, epsilon, norm]"
                    if len(self._hp.autograsp_epsilon) == 2:
                        self._hp.autograsp_epsilon = [
                            i for i in self._hp.autograsp_epsilon
                        ] + [1]

                    actions = apply_ag_epsilon(actions, self.state, self._hp,
                                               self._close_override,
                                               self.t < self._hp.repeat)
            else:
                sampler = self._hp.custom_sampler(self.sigma, self.mean,
                                                  self._hp, self.repeat,
                                                  self.adim)
                actions = sampler.sample(self.M, self.state)

            itr_times['action_sampling'] = time.time() - t_startiter
            t_start = time.time()

            scores = self.get_rollouts(actions, itr, itr_times)
            itr_times['vid_pred_total'] = time.time() - t_start
            t = time.time()
            self.logger.log(
                'overall time for evaluating actions {}'.format(time.time() -
                                                                t_start))

            if self._hp.stochastic_planning:
                actions, scores = self.action_preselection(actions, scores)

            self.indices = scores.argsort()[:self.K]
            self.bestindices_of_iter[itr] = self.indices

            self.bestaction_withrepeat = actions[self.indices[0]]
            self.plan_stat['scores_itr{}'.format(itr)] = scores
            self.plan_stat['bestscore_itr{}'.format(itr)] = scores[
                self.indices[0]]
            if hasattr(self, 'best_cost_perstep'):
                self.plan_stat['best_cost_perstep'] = self.best_cost_perstep

            actions_flat = self.post_process_actions(actions)

            self.fit_gaussians(actions_flat)

            self.logger.log('iter {0}, bestscore {1}'.format(
                itr, scores[self.indices[0]]))
            self.logger.log(
                'overall time for iteration {}'.format(time.time() -
                                                       t_startiter))
            itr_times['post_pred'] = time.time() - t
            timings['itr{}'.format(itr)] = itr_times

        # pkl.dump(timings, open('{}/timings_CEM_{}.pkl'.format(self.agentparams['record'], self.t), 'wb'))

    def sample_actions(self, mean, sigma, hp, M):
        actions = np.random.multivariate_normal(mean, sigma, M)
        actions = actions.reshape(M, hp.naction_steps, hp.adim)
        if hp.discrete_ind != None:
            actions = discretize(actions, M, hp.naction_steps, hp.discrete_ind)

        if hp.action_bound:
            actions = truncate_movement(actions, hp)
        actions = np.repeat(actions, hp.repeat, axis=1)

        if hp.add_zero_action:
            actions[0] = 0

        return actions

    def fit_gaussians(self, actions_flat):
        arr_best_actions = actions_flat[
            self.indices]  # only take the K best actions
        self.sigma = np.cov(arr_best_actions, rowvar=False, bias=False)
        if self._hp.cov_blockdiag:
            self.sigma = make_blockdiagonal(self.sigma, self.naction_steps,
                                            self.adim)
        if self._hp.smooth_cov:
            self.sigma = 0.5 * self.sigma + 0.5 * self.sigma_prev
            self.sigma_prev = self.sigma
        self.mean = np.mean(arr_best_actions, axis=0)

    def post_process_actions(self, actions):
        num_ex = self.M // self.smp_peract
        actions = actions.reshape(num_ex, self.naction_steps, self.repeat,
                                  self.adim)
        actions = actions[:, :,
                          -1, :]  # taking only one of the repeated actions
        actions_flat = actions.reshape(num_ex, self.naction_steps * self.adim)
        self.bestaction = actions[self.indices[0]]
        return actions_flat

    def sample_actions_rej(self):
        """
        Perform rejection sampling
        :return:
        """
        runs = []
        actions = []

        if self._hp.stochastic_planning:
            num_distinct_actions = self.M // self.smp_peract
        else:
            num_distinct_actions = self.M

        for i in range(num_distinct_actions):
            ok = False
            i = 0
            while not ok:
                i += 1
                action_seq = np.random.multivariate_normal(
                    self.mean, self.sigma, 1)

                action_seq = action_seq.reshape(self.naction_steps, self.adim)
                xy_std = self._hp.initial_std
                lift_std = self._hp.initial_std_lift

                std_fac = 1.5
                if np.any(action_seq[:, :2] > xy_std*std_fac) or \
                        np.any(action_seq[:, :2] < -xy_std*std_fac) or \
                        np.any(action_seq[:, 2] > lift_std*std_fac) or \
                        np.any(action_seq[:, 2] < -lift_std*std_fac):
                    ok = False
                else:
                    ok = True

            runs.append(i)
            actions.append(action_seq)
        actions = np.stack(actions, axis=0)

        if self._hp.stochastic_planning:
            actions = np.repeat(actions, self._hp.stochastic_planning[0], 0)

        self.logger.log('rejection smp max trials', max(runs))
        if self._hp.discrete_ind != None:
            actions = self.discretize(actions)
        actions = np.repeat(actions, self.repeat, axis=1)

        self.logger.log('max action val xy', np.max(actions[:, :, :2]))
        self.logger.log('max action val z', np.max(actions[:, :, 2]))
        return actions

    def action_preselection(self, actions, scores):
        actions = actions.reshape((self.M // self.smp_peract, self.smp_peract,
                                   self.naction_steps, self.repeat, self.adim))
        scores = scores.reshape((self.M // self.smp_peract, self.smp_peract))
        if self._hp.stochastic_planning[1] == 'optimistic':
            inds = np.argmax(scores, axis=1)
            scores = np.max(scores, axis=1)
        elif self._hp.stochastic_planning[1] == 'pessimistic':
            inds = np.argmin(scores, axis=1)
            scores = np.min(scores, axis=1)
        else:
            raise ValueError

        actions = [
            actions[b, inds[b]] for b in range(self.M // self.smp_peract)
        ]
        return np.stack(actions, 0), scores

    def get_rollouts(self, actions, cem_itr, itr_times):
        raise NotImplementedError

    def act(self, t=None, i_tr=None):
        """
        Return a random action for a state.
        Args:
                if performing highres tracking images is highres image
            t: the current controller's Time step
            goal_pix: in coordinates of small image
            desig_pix: in coordinates of small image
        """
        self.i_tr = i_tr
        self.t = t

        if t == 0:
            action = np.zeros(self.agentparams['adim'])
            self._close_override = False
        else:
            if self._hp.use_first_plan:
                self.logger.log('using actions of first plan, no replanning!!')
                if t == 1:
                    self.perform_CEM()
                action = self.bestaction_withrepeat[t]
            elif self._hp.replan_interval != -1:
                if (t - 1) % self._hp.replan_interval == 0:
                    self.last_replan = t
                    self.perform_CEM()
                self.logger.log('last replan', self.last_replan)
                self.logger.log('taking action of ', t - self.last_replan)
                action = self.bestaction_withrepeat[t - self.last_replan]
            else:
                self.perform_CEM()
                action = self.bestaction[0]
                self.logger.log('########')
                self.logger.log('best action sequence: ')
                for i in range(self.bestaction.shape[0]):
                    self.logger.log("t{}: {}".format(i, self.bestaction[i]))
                self.logger.log('########')

        self.action_list.append(action)

        self.logger.log("applying action  {}".format(action))

        if self.agentparams['adim'] == 5 and action[-1] > 0:
            self._close_override = True
        else:
            self._close_override = False

        return {'actions': action, 'plan_stat': self.plan_stat}
def sync(node_id, conf, printout=False):
    experiment_name = str.split(conf['current_dir'], '/')[-1]

    master_datadir = '/raid/ngc2/pushing_data/cartgripper/onpolicy/{}'.format(
        experiment_name)
    master_scoredir = '/raid/ngc2/pushing_data/cartgripper/onpolicy/{}/scores'.format(
        experiment_name)

    exp_subpath = conf['current_dir'].partition('onpolicy')[2]

    master_base_dir = '/home/ngc2/Documents/visual_mpc/experiments/cem_exp/onpolicy' + exp_subpath
    master_modeldata_dir = master_base_dir + '/modeldata'
    master_logging_dir = master_base_dir + '/logging_datacollectors'

    logging_dir = conf['agent']['logging_dir']
    logger = Logger(logging_dir,
                    'sync_node{}.txt'.format(node_id),
                    printout=printout)
    logger.log('started remote sync process on node{}'.format(node_id))

    # local means "locally" in the container on ngc2
    local_modeldata_dir = '/result/modeldata'
    local_datadir = '/result/data'
    local_scoredir = '/result/data/scores'

    if not os.path.exists(local_modeldata_dir):
        os.makedirs(local_modeldata_dir)

    while True:
        logger.log('get latest weights from master')
        cmd = 'rsync -rltgoDv --delete-after {}:{} {}'.format(
            master, master_modeldata_dir + '/', local_modeldata_dir)
        logger.log('executing: {}'.format(cmd))
        os.system(cmd)

        transfer_tfrecs(local_datadir, master_datadir, logger, 'train')
        transfer_tfrecs(local_datadir, master_datadir, logger, 'val')

        logger.log('transfer scorefiles to master')
        cmd = 'rsync -a --update {} {}:{}'.format(local_scoredir + '/', master,
                                                  master_scoredir)
        logger.log('executing: {}'.format(cmd))
        os.system(cmd)

        logger.log('transfer logfiles to master')
        cmd = 'rsync -a --update {} {}:{}'.format(logging_dir + '/', master,
                                                  master_logging_dir)
        logger.log('executing: {}'.format(cmd))
        os.system(cmd)

        time.sleep(10)
    def __init__(self,
                 dict_=None,
                 append_masks=True,
                 filepath=None,
                 dict_name=None,
                 numex=4,
                 suf="",
                 col_titles=None,
                 renorm_heatmaps=True,
                 logger=None):
        """
        :param dict_: dictionary containing image tensors
        :param append_masks: whether to visualize the masks
        :param gif_savepath: the path to save the gif
        :param numex: how many examples of the batch to visualize
        :param suf: append a suffix to the gif name
        :param col_titles: a list of titles for each column

        The dictionary contains keys-values pairs of {"video_name":"image_tensor_list"}
        where "video_name" is used as the caption in the visualization
        where "image_tensor_list" is a list with np.arrays (batchsize, 64,64,n_channel) for each time step.

        If n_channel is 1 a heatmap will be shown. Use renorm_heatmaps=True to normalize the heatmaps
        at every time step (this is necessary when the range of values changes significantly over time).

        If the key contains the string "flow" a color-coded flow field will be shown.

        if the key contains the string "masks" the image_tensor_list needs to be of the following form:
        [mask_list_0, ..., mask_list_Tmax]
        where mask_list_t = [mask_0, ..., mask_N]
        where mask_i.shape = [batch_size, 64,64,1]
        """
        if logger == None:
            self.logger = Logger(mute=True)
        else:
            self.logger = logger

        self.gif_savepath = filepath
        if dict_name != None:
            dict_ = pickle.load(open(filepath + '/' + dict_name, "rb"))

        self.dict_ = dict_

        if 'iternum' in dict_:
            self.iternum = dict_['iternum']
            del dict_['iternum']
        else:
            self.iternum = ""

        if 'gen_images' in dict_:
            gen_images = dict_['gen_images']
            if gen_images[0].shape[0] < numex:
                raise ValueError(
                    "batchsize too small for providing desired number of exmaples!"
                )

        self.numex = numex
        self.video_list = []
        self.append_masks = False

        for key in list(dict_.keys()):
            data = dict_[key]

            if key == 'ground_truth':  # special treatement for gtruth
                ground_truth = dict_['ground_truth']
                if not isinstance(ground_truth, list):
                    ground_truth = np.split(ground_truth,
                                            ground_truth.shape[1],
                                            axis=1)
                    if ground_truth[0].shape[0] == 1:
                        ground_truth = [
                            g.reshape((1, 64, 64, 3)) for g in ground_truth
                        ]
                    else:
                        ground_truth = [np.squeeze(g) for g in ground_truth]
                ground_truth = ground_truth[1:]

                if 'overlay_' + key in dict_:
                    overlay_points = dict_['overlay_' + key]
                    self.video_list.append(
                        (ground_truth, 'Ground Truth', overlay_points))
                else:
                    self.video_list.append((ground_truth, 'Ground Truth'))

            elif 'overlay' in key:
                self.logger.log('visualizing overlay')
                images = data[0]
                gen_distrib = data[1]
                gen_distrib = color_code_distrib(gen_distrib,
                                                 self.numex,
                                                 renormalize=True)
                if gen_distrib[0].shape != images[0].shape:
                    images = resize_image(images, gen_distrib[0].shape[1:3])
                overlay = compute_overlay(images, gen_distrib, self.numex)
                self.video_list.append((overlay, key))

            elif type(data[0]) is list or '_l' in key:  # for lists of videos
                if 'masks' in key and not append_masks:
                    self.logger.log('skipping masks!')
                    continue
                self.logger.log("the key \"{}\" contains {} videos".format(
                    key, len(data[0])))
                self.append_masks = True
                vid_list = convert_to_videolist(data, repeat_last_dim=False)

                for i, m in enumerate(vid_list):
                    self.video_list.append((m, '{} {}'.format(key, i)))

            elif 'flow' in key:
                self.logger.log(
                    'visualizing key {} with colorflow'.format(key))
                self.video_list.append((visualize_flow(data), key))

            elif 'actions' in key:
                self.visualize_states_actions(dict_['states'],
                                              dict_['actions'])

            elif 'gen_distrib' in key:  # if gen_distrib plot psum overtime!
                self.video_list.append((data, key))
            else:
                if isinstance(data, list):
                    if len(data[0].shape) == 4:
                        self.video_list.append((data, key))
                    else:
                        raise "wrong shape in key {} with shape {}".format(
                            key, data[0].shape)
                else:
                    self.logger.log('ignoring key ', key)

            if key == 'scores':
                self.video_list.append((self.get_score_images(data), key))

        self.renormalize_heatmaps = renorm_heatmaps
        self.logger.log('renormalizing heatmaps: ', self.renormalize_heatmaps)

        self.t = 0

        self.suf = suf
        self.num_rows = len(self.video_list)

        self.col_titles = col_titles
class Visualizer_tkinter(object):
    def __init__(self,
                 dict_=None,
                 append_masks=True,
                 filepath=None,
                 dict_name=None,
                 numex=4,
                 suf="",
                 col_titles=None,
                 renorm_heatmaps=True,
                 logger=None):
        """
        :param dict_: dictionary containing image tensors
        :param append_masks: whether to visualize the masks
        :param gif_savepath: the path to save the gif
        :param numex: how many examples of the batch to visualize
        :param suf: append a suffix to the gif name
        :param col_titles: a list of titles for each column

        The dictionary contains keys-values pairs of {"video_name":"image_tensor_list"}
        where "video_name" is used as the caption in the visualization
        where "image_tensor_list" is a list with np.arrays (batchsize, 64,64,n_channel) for each time step.

        If n_channel is 1 a heatmap will be shown. Use renorm_heatmaps=True to normalize the heatmaps
        at every time step (this is necessary when the range of values changes significantly over time).

        If the key contains the string "flow" a color-coded flow field will be shown.

        if the key contains the string "masks" the image_tensor_list needs to be of the following form:
        [mask_list_0, ..., mask_list_Tmax]
        where mask_list_t = [mask_0, ..., mask_N]
        where mask_i.shape = [batch_size, 64,64,1]
        """
        if logger == None:
            self.logger = Logger(mute=True)
        else:
            self.logger = logger

        self.gif_savepath = filepath
        if dict_name != None:
            dict_ = pickle.load(open(filepath + '/' + dict_name, "rb"))

        self.dict_ = dict_

        if 'iternum' in dict_:
            self.iternum = dict_['iternum']
            del dict_['iternum']
        else:
            self.iternum = ""

        if 'gen_images' in dict_:
            gen_images = dict_['gen_images']
            if gen_images[0].shape[0] < numex:
                raise ValueError(
                    "batchsize too small for providing desired number of exmaples!"
                )

        self.numex = numex
        self.video_list = []
        self.append_masks = False

        for key in list(dict_.keys()):
            data = dict_[key]

            if key == 'ground_truth':  # special treatement for gtruth
                ground_truth = dict_['ground_truth']
                if not isinstance(ground_truth, list):
                    ground_truth = np.split(ground_truth,
                                            ground_truth.shape[1],
                                            axis=1)
                    if ground_truth[0].shape[0] == 1:
                        ground_truth = [
                            g.reshape((1, 64, 64, 3)) for g in ground_truth
                        ]
                    else:
                        ground_truth = [np.squeeze(g) for g in ground_truth]
                ground_truth = ground_truth[1:]

                if 'overlay_' + key in dict_:
                    overlay_points = dict_['overlay_' + key]
                    self.video_list.append(
                        (ground_truth, 'Ground Truth', overlay_points))
                else:
                    self.video_list.append((ground_truth, 'Ground Truth'))

            elif 'overlay' in key:
                self.logger.log('visualizing overlay')
                images = data[0]
                gen_distrib = data[1]
                gen_distrib = color_code_distrib(gen_distrib,
                                                 self.numex,
                                                 renormalize=True)
                if gen_distrib[0].shape != images[0].shape:
                    images = resize_image(images, gen_distrib[0].shape[1:3])
                overlay = compute_overlay(images, gen_distrib, self.numex)
                self.video_list.append((overlay, key))

            elif type(data[0]) is list or '_l' in key:  # for lists of videos
                if 'masks' in key and not append_masks:
                    self.logger.log('skipping masks!')
                    continue
                self.logger.log("the key \"{}\" contains {} videos".format(
                    key, len(data[0])))
                self.append_masks = True
                vid_list = convert_to_videolist(data, repeat_last_dim=False)

                for i, m in enumerate(vid_list):
                    self.video_list.append((m, '{} {}'.format(key, i)))

            elif 'flow' in key:
                self.logger.log(
                    'visualizing key {} with colorflow'.format(key))
                self.video_list.append((visualize_flow(data), key))

            elif 'actions' in key:
                self.visualize_states_actions(dict_['states'],
                                              dict_['actions'])

            elif 'gen_distrib' in key:  # if gen_distrib plot psum overtime!
                self.video_list.append((data, key))
            else:
                if isinstance(data, list):
                    if len(data[0].shape) == 4:
                        self.video_list.append((data, key))
                    else:
                        raise "wrong shape in key {} with shape {}".format(
                            key, data[0].shape)
                else:
                    self.logger.log('ignoring key ', key)

            if key == 'scores':
                self.video_list.append((self.get_score_images(data), key))

        self.renormalize_heatmaps = renorm_heatmaps
        self.logger.log('renormalizing heatmaps: ', self.renormalize_heatmaps)

        self.t = 0

        self.suf = suf
        self.num_rows = len(self.video_list)

        self.col_titles = col_titles

    def get_score_images(self, scores):
        height = self.video_list[0][0][0].shape[1]
        width = self.video_list[0][0][0].shape[2]
        seqlen = len(self.video_list[0][0])

        txt_im = []
        for i in range(self.numex):
            txt_im.append(
                draw_text_image(str(scores[i]), image_size=(height, width)))
        textrow = np.stack(txt_im, 0)

        textrow = [textrow for _ in range(seqlen)]
        return textrow

    def make_direct_vid(self, separate_vid=False, resize=None):
        self.logger.log('making gif with tags')

        new_videolist = []
        for vid in self.video_list:
            print('key', vid[1])
            print('len', len(vid[0]))
            print('sizes', [im.shape for im in vid[0]])
            print('####')
            if 'gen_distrib' in vid[1]:
                plt.switch_backend('TkAgg')
                # plt.imshow(vid[0][0][0])
                # plt.show()

            images = vid[0]
            if resize is not None:
                images = resize_image(images, size=resize)
            name = vid[1]

            if images[0].shape[-1] == 1 or len(images[0].shape) == 3:
                images = color_code_distrib(images,
                                            self.numex,
                                            renormalize=True)

            new_videolist.append((images, name))

        framelist = assemble_gif(new_videolist,
                                 convert_from_float=True,
                                 num_exp=self.numex)
        # save_video_mp4(self.gif_savepath +'/prediction_at_t{}')
        npy_to_gif(
            framelist,
            self.gif_savepath + '/direct{}{}'.format(self.iternum, self.suf))

    def visualize_states_actions(self, states, actions):

        plt.figure(figsize=(25, 2), dpi=80)

        for ex in range(self.numex):
            plt.subplot(1, self.numex, ex + 1)
            plt.axis('equal')

            move = actions[ex, :, :2]
            updown = actions[ex, :, 2]
            rot = actions[ex, :, 3]
            open = actions[ex, :4]

            state_xy = states[ex, :, :2]
            alpha = states[ex, :, 3]

            action_startpoints = state_xy
            action_endpoints = state_xy + move

            plt.plot(state_xy[:, 0], state_xy[:, 1], '-o')
            plt.ylim([-0.17, 0.17])
            plt.xlim([0.46, 0.83])

            for t in range(states.shape[1]):

                x = [action_startpoints[t, 0], action_endpoints[t, 0]]
                y = [action_startpoints[t, 1], action_endpoints[t, 1]]
                if t % 2 == 0:
                    plt.plot(x, y, '--r')
                else:
                    plt.plot(x, y, '--y')

        # plt.show()
        plt.savefig(self.gif_savepath + "/actions_vis.png")
        plt.close('all')

    def make_image_strip(self, i_ex, tstart=1, tend=13):
        """
        :param i_ex:  the index of the example to flatten to the image strip
        :param tstart:
        :param tend:
        :return:
        """

        cols = tend - tstart + 1

        width_per_ex = 1.

        standard_size = np.array([width_per_ex * cols,
                                  self.num_rows * 1.0])  ### 1.5
        # standard_size = np.array([6, 24])
        figsize = (standard_size * 1.0).astype(np.int)
        fig = plt.figure(num=1, figsize=figsize)

        outer_grid = gridspec.GridSpec(self.num_rows, 1)
        drow = 1. / self.num_rows

        self.im_handle_list = []

        axes_list = []

        for row in range(self.num_rows):
            inner_grid = gridspec.GridSpecFromSubplotSpec(
                1, cols, subplot_spec=outer_grid[row], wspace=0.0, hspace=0.0)
            image_row = self.video_list[row][0]

            im_handle_row = []
            col = 0
            for t in range(tstart, tend):

                ax = plt.Subplot(fig, inner_grid[col])
                ax.set_xticks([])
                ax.set_yticks([])
                axes_list.append(fig.add_subplot(ax))

                if image_row[0][i_ex].shape[-1] == 1:

                    im_handle = axes_list[-1].imshow(np.squeeze(
                        image_row[t][i_ex]),
                                                     zorder=0,
                                                     cmap=plt.get_cmap('jet'),
                                                     interpolation='none',
                                                     animated=True)
                else:
                    im_handle = axes_list[-1].imshow(image_row[t][i_ex],
                                                     interpolation='none',
                                                     animated=True)

                im_handle_row.append(im_handle)

                col += 1
            self.im_handle_list.append(im_handle_row)

            # plt.figtext(.5, 1 - (row * drow * 0.990) - 0.01, self.video_list[row][1], va="center", ha="center",
            #             size=8)

        plt.savefig(self.gif_savepath +
                    '/iter{}ex{}_overtime.png'.format(self.iternum, i_ex))

    def build_figure(self):
        self.logger.log('building figure...')

        # plot each markevery case for linear x and y scales
        root = Tk.Tk()
        root.rowconfigure(1, weight=1)
        root.columnconfigure(1, weight=1)

        frame = Frame(root)
        frame.grid(column=1, row=1, sticky=tkinter.constants.NSEW)
        frame.rowconfigure(1, weight=1)
        frame.columnconfigure(1, weight=1)

        if self.numex == 1:
            width_per_ex = 1.5
        else:
            width_per_ex = 0.9
        standard_size = np.array(
            [width_per_ex * self.numex, self.num_rows * 1.0])  ### 1.5
        # standard_size = np.array([6, 24])
        figsize = (standard_size * 1.0).astype(np.int)
        fig = plt.figure(num=1, figsize=figsize)

        self.addScrollingFigure(fig, frame)

        buttonFrame = Frame(root)
        buttonFrame.grid(row=1, column=2, sticky=tkinter.constants.NS)
        biggerButton = Button(buttonFrame,
                              text="larger",
                              command=lambda: self.changeSize(fig, 1.5))
        biggerButton.grid(column=1, row=1)
        smallerButton = Button(buttonFrame,
                               text="smaller",
                               command=lambda: self.changeSize(fig, .5))
        smallerButton.grid(column=1, row=2)

        axes_list = []
        l = []

        for vid in self.video_list:
            l.append(len(vid[0]))
        tlen = np.min(np.array(l))
        self.logger.log('minimum video length', tlen)

        outer_grid = gridspec.GridSpec(self.num_rows, 1)

        drow = 1. / self.num_rows

        self.im_handle_list = []
        self.plot_handle_list = []
        for row in range(self.num_rows):
            inner_grid = gridspec.GridSpecFromSubplotSpec(
                1,
                self.numex,
                subplot_spec=outer_grid[row],
                wspace=0.0,
                hspace=0.0)
            image_row = self.video_list[row][0]

            im_handle_row = []
            plot_handle_row = []
            for col in range(self.numex):
                ax = plt.Subplot(fig, inner_grid[col])
                ax.set_xticks([])
                ax.set_yticks([])
                axes_list.append(fig.add_subplot(ax))
                if row == 0 and self.col_titles != None:
                    axes_list[-1].set_title(self.col_titles[col])

                if image_row[0][col].shape[-1] == 1:

                    im_handle = axes_list[-1].imshow(
                        np.squeeze(image_row[0][col]),  # first timestep
                        zorder=0,
                        cmap=plt.get_cmap('jet'),
                        interpolation='none',
                        animated=True)
                else:
                    im_handle = axes_list[-1].imshow(image_row[0][col],
                                                     interpolation='none',
                                                     animated=True)

                if len(self.video_list[row]) == 3:
                    #overlay with markers:
                    coords = self.video_list[row][2][t][col]
                    plothandle = axes_list[-1].scatter(coords[1],
                                                       coords[0],
                                                       marker="d",
                                                       s=70,
                                                       edgecolors='r',
                                                       facecolors="None")
                    axes_list[-1].set_xlim(0, 63)
                    axes_list[-1].set_ylim(63, 0)
                    plot_handle_row.append(plothandle)
                else:
                    plot_handle_row.append(None)

                im_handle_row.append(im_handle)
            self.im_handle_list.append(im_handle_row)
            self.plot_handle_list.append(plot_handle_row)

            plt.figtext(.5,
                        1 - (row * drow * 1.) - 0.001,
                        self.video_list[row][1],
                        va="center",
                        ha="center",
                        size=8)

        plt.axis('off')
        fig.tight_layout()

        # Set up formatting for the movie files
        Writer = animation.writers['imagemagick_file']
        writer = Writer(fps=15, metadata=dict(artist='Me'), bitrate=1800)

        # call the animator.  blit=True means only re-draw the parts that have changed.
        anim = animation.FuncAnimation(fig,
                                       self.animate,
                                       fargs=[
                                           self.im_handle_list,
                                           self.plot_handle_list,
                                           self.video_list, self.numex,
                                           self.num_rows, tlen
                                       ],
                                       frames=tlen,
                                       interval=200,
                                       blit=True)

        if self.append_masks:
            self.suf = '_masks' + self.suf
        if self.gif_savepath != None:
            filepath = self.gif_savepath + '/animation{}{}.gif'.format(
                self.iternum, self.suf)
            # filepath = self.gif_savepath + '/animation{}{}.mp4'.format(self.iternum,self.suf)
            self.logger.log('saving gif under: ', filepath)
            anim.save(filepath, writer='imagemagick')
        root.mainloop()

    def changeSize(self, figure, factor):
        global canvas, mplCanvas, interior, interior_id, frame, cwid
        oldSize = figure.get_size_inches()
        self.logger.log(("old size is", oldSize))
        figure.set_size_inches([factor * s for s in oldSize])
        wi, hi = [i * figure.dpi for i in figure.get_size_inches()]
        self.logger.log(("new size is", figure.get_size_inches()))
        self.logger.log(("new size pixels: ", wi, hi))
        mplCanvas.config(width=wi, height=hi)
        printBboxes("A")
        # mplCanvas.grid(sticky=Tkconstants.NSEW)
        canvas.itemconfigure(cwid, width=wi, height=hi)
        printBboxes("B")
        canvas.config(scrollregion=canvas.bbox(tkinter.constants.ALL),
                      width=200,
                      height=200)
        figure.canvas.draw()
        printBboxes("C")
        self.logger.log()

    def addScrollingFigure(self, figure, frame):
        global canvas, mplCanvas, interior, interior_id, cwid
        # set up a canvas with scrollbars
        canvas = Canvas(frame)
        canvas.grid(row=1, column=1, sticky=tkinter.constants.NSEW)

        xScrollbar = Scrollbar(frame, orient=tkinter.constants.HORIZONTAL)
        yScrollbar = Scrollbar(frame)

        xScrollbar.grid(row=2, column=1, sticky=tkinter.constants.EW)
        yScrollbar.grid(row=1, column=2, sticky=tkinter.constants.NS)

        canvas.config(xscrollcommand=xScrollbar.set)
        xScrollbar.config(command=canvas.xview)
        canvas.config(yscrollcommand=yScrollbar.set)
        yScrollbar.config(command=canvas.yview)

        # plug in the figure
        figAgg = FigureCanvasTkAgg(figure, canvas)
        mplCanvas = figAgg.get_tk_widget()
        # mplCanvas.grid(sticky=Tkconstants.NSEW)

        # and connect figure with scrolling region
        cwid = canvas.create_window(0,
                                    0,
                                    window=mplCanvas,
                                    anchor=tkinter.constants.NW)
        printBboxes("Init")
        canvas.config(scrollregion=canvas.bbox(tkinter.constants.ALL),
                      width=200,
                      height=200)

    def animate(self, *args):
        global t
        _, im_handle_list, plot_handle_list, video_list, num_ex, num_rows, tlen = args

        artistlist = []
        for row in range(num_rows):
            image_row = video_list[row][0]  #0 stands for images

            for col in range(num_ex):

                if image_row[0][col].shape[
                        -1] == 1:  # if visualizing with single-channel heatmap
                    im = np.squeeze(image_row[t][col])
                    if self.renormalize_heatmaps:
                        im = im / (np.max(im) + 1e-5)
                    im_handle_list[row][col].set_array(im)
                else:
                    im_handle_list[row][col].set_array(image_row[t][col])

                if len(video_list[row]) == 3:
                    overlay_row = video_list[row][2]  #2 stands for overlay
                    plot_handle_list[row][col].set_array(
                        overlay_row[t][col])  #2 stands for overlay
                    # print "set array to", overlay_row[t][col]
                    artistlist.append(plot_handle_list[row][col])

            artistlist += im_handle_list[row]
        # print 'update at t', t
        t += 1

        if t == tlen:
            t = 0

        return artistlist
class ReplayBuffer(object):
    def __init__(self, conf, data_collectors=None, todo_ids=None, printout=False, mode='train'):
        self.logger = Logger(conf['logging_dir'], 'replay_log.txt', printout=printout)
        self.conf = conf
        self.onpolconf = conf['onpolconf']
        if 'agent' in conf:
            self.agentparams = conf['agent']
        self.ring_buffer = []
        self.mode = mode
        self.maxsize = self.onpolconf['replay_size'][mode]
        self.batch_size = conf['batch_size']
        self.data_collectors = data_collectors
        self.todo_ids = todo_ids
        self.scores = []
        self.num_updates = 0
        self.logger.log('init Replay buffer')
        self.tstart = time.time()

    def push_back(self, traj):
        assert traj.images.dtype == np.float32 and np.max(traj.images) <= 1.0
        self.ring_buffer.append(traj)
        if len(self.ring_buffer) > self.maxsize:
            self.ring_buffer.pop(0)
        if len(self.ring_buffer) % 100 == 0:
            self.logger.log('current size {}'.format(len(self.ring_buffer)))

    def get_batch(self):
        images = []
        states = []
        actions = []
        current_size = len(self.ring_buffer)
        for b in range(self.batch_size):
            i = random.randint(0, current_size-1)
            traj = self.ring_buffer[i]
            images.append(traj.images)
            states.append(traj.X_Xdot_full)
            actions.append(traj.actions)
        return np.stack(images,0), np.stack(states,0), np.stack(actions,0)

    def update(self, sess):
        done_id, self.todo_ids = ray.wait(self.todo_ids, timeout=0)
        if len(done_id) != 0:
            self.logger.log("len doneid {}".format(len(done_id)))
            for id in done_id:
                traj, info = ray.get(id)
                self.logger.log("received trajectory from {}, pushing back traj".format(info['collector_id']))
                self.push_back(traj)
                self.scores.append(traj.final_poscost)
                # relauch the collector if it hasn't done all its work yet.
                returning_collector = self.data_collectors[info['collector_id']]
                self.todo_ids.append(returning_collector.run_traj.remote())
                self.logger.log('restarting {}'.format(info['collector_id']))

                self.num_updates += 1

                if self.num_updates % 100 == 0:
                    plot_scores(self.scores, self.agentparams['result_dir'])

                self.logger.log('traj_per hour: {}'.format(self.num_updates/((time.time() - self.tstart)/3600)))
                self.logger.log('avg time per traj {}s'.format((time.time() - self.tstart)/self.num_updates))
def trainvid_online(train_replay_buffer,
                    val_replay_buffer,
                    conf,
                    logging_dir,
                    gpu_id,
                    printout=False):
    logger = Logger(logging_dir, 'trainvid_online_log.txt', printout=printout)
    logger.log('starting trainvid online')

    os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)
    logger.log('training video prediction using cuda_visible_devices=',
               os.environ["CUDA_VISIBLE_DEVICES"])
    from tensorflow.python.client import device_lib
    logger.log(device_lib.list_local_devices())

    if 'RESULT_DIR' in os.environ:
        conf['output_dir'] = os.environ['RESULT_DIR'] + '/modeldata'
    conf['event_log_dir'] = conf['output_dir']

    if 'TEN_DATA' in os.environ:
        tenpath = conf['pretrained_model'].partition('tensorflow_data')[2]
        conf['pretrained_model'] = os.environ['TEN_DATA'] + tenpath

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9)
    g_vidpred = tf.Graph()
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                            allow_soft_placement=True),
                      graph=g_vidpred)
    with sess.as_default():
        with g_vidpred.as_default():
            tf.train.start_queue_runners(sess)
            sess.run(tf.global_variables_initializer())

            train_replay_buffer.preload(conf)

            Model = conf['pred_model']
            model = Model(conf, load_data=False, build_loss=True)
            logger.log('Constructing saver.')
            vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
            vars = filter_vars(vars)
            saving_saver = tf.train.Saver(vars, max_to_keep=0)
            summary_writer = tf.summary.FileWriter(conf['event_log_dir'],
                                                   graph=sess.graph,
                                                   flush_secs=10)

            if conf['pred_model'] == Alex_Interface_Model:
                if gfile.Glob(conf['pretrained_model'] + '*') is None:
                    raise ValueError("Model file {} not found!".format(
                        conf['pretrained_model']))
                model.m.restore(sess, conf['pretrained_model'])
            else:
                vars = variable_checkpoint_matcher(conf, vars,
                                                   conf['pretrained_model'])
                loading_saver = tf.train.Saver(vars, max_to_keep=0)
                load_checkpoint(conf, sess, loading_saver,
                                conf['pretrained_model'])

            logger.log(
                '-------------------------------------------------------------------'
            )
            logger.log('verify current settings!! ')
            for key in list(conf.keys()):
                logger.log(key, ': ', conf[key])
            logger.log(
                '-------------------------------------------------------------------'
            )

            tf.logging.set_verbosity(tf.logging.INFO)

            starttime = time.time()
            t_iter = []
            for itr in range(0, conf['num_iterations'], 1):

                if itr % 10 == 0:
                    tstart_rb_update = time.time()
                    train_replay_buffer.update(sess)
                    if itr % 100 == 0:
                        logger.log(
                            "took {} to update the replay buffer".format(
                                time.time() - tstart_rb_update))

                t_startiter = time.time()
                images, states, actions = train_replay_buffer.get_batch()
                feed_dict = {
                    model.iter_num: np.float32(itr),
                    model.images_pl: images,
                    model.actions_pl: actions,
                    model.states_pl: states
                }
                if conf['pred_model'] == Alex_Interface_Model:
                    cost, _, summary_str = sess.run([
                        model.m.g_loss, model.m.train_op, model.m.train_summ_op
                    ], feed_dict)
                else:
                    cost, _, summary_str = sess.run(
                        [model.loss, model.train_op, model.train_summ_op],
                        feed_dict)
                t_iter.append(time.time() - t_startiter)

                if (itr) % 10 == 0:
                    logger.log('cost ' + str(itr) + ' ' + str(cost))

                if (itr) % VAL_INTERVAL == 0:
                    val_replay_buffer.update(sess)
                    images, states, actions = val_replay_buffer.get_batch()
                    feed_dict = {
                        model.iter_num: np.float32(itr),
                        model.images_pl: images,
                        model.actions_pl: actions,
                        model.states_pl: states
                    }
                    if conf['pred_model'] == Alex_Interface_Model:
                        [summary_str] = sess.run([model.m.val_summ_op],
                                                 feed_dict)
                    else:
                        [summary_str] = sess.run([model.val_summ_op],
                                                 feed_dict)
                    summary_writer.add_summary(summary_str, itr)

                if (itr) % VIDEO_INTERVAL == 0:
                    feed_dict = {
                        model.iter_num: np.float32(itr),
                        model.images_pl: images,
                        model.actions_pl: actions,
                        model.states_pl: states
                    }
                    video_proto = sess.run(model.train_video_summaries,
                                           feed_dict=feed_dict)
                    summary_writer.add_summary(
                        convert_tensor_to_gif_summary(video_proto), itr)

                save_interval = conf['onpolconf']['save_interval']
                if (itr) % save_interval == 0 and itr != 0:
                    oldmodelname = conf['output_dir'] + '/model' + str(
                        itr - save_interval)
                    if gfile.Glob(oldmodelname +
                                  '*') != [] and (itr - save_interval) > 0:
                        print('deleting {}*'.format(oldmodelname))
                        os.system("rm {}*".format(oldmodelname))
                    logger.log('Saving model to' + conf['output_dir'])
                    newmodelname = conf['output_dir'] + '/model' + str(itr)
                    saving_saver.save(sess, newmodelname)

                if itr % 50 == 1:
                    hours = (time.time() - starttime) / 3600
                    logger.log('running for  {}h'.format(hours))
                    avg_t_iter = np.mean(t_iter[-100:])
                    logger.log(
                        'average time per iteration: {0}s'.format(avg_t_iter))
                    logger.log('expected for complete training: {0}h '.format(
                        avg_t_iter / 3600 * conf['num_iterations']))

                if (itr) % SUMMARY_INTERVAL == 2:
                    summary_writer.add_summary(summary_str, itr)
            return t_iter