Ejemplo n.º 1
0
 def __init__(self, env_fns, spaces=None, context='spawn'):
     """
     If you don't specify observation_space, we'll have to create a dummy
     environment to get it.
     """
     ctx = mp.get_context(context)
     if spaces:
         observation_space, action_space = spaces
     else:
         logger.log('Creating dummy env object to get spaces')
         with logger.scoped_configure(format_strs=[]):
             dummy = env_fns[0]()
             observation_space, action_space = dummy.observation_space, dummy.action_space
             dummy.close()
             del dummy
     VecEnv.__init__(self, len(env_fns), observation_space, action_space)
     self.obs_keys, self.obs_shapes, self.obs_dtypes = obs_space_info(
         observation_space)
     self.obs_bufs = [{
         k: ctx.Array(_NP_TO_CT[self.obs_dtypes[k].type],
                      int(np.prod(self.obs_shapes[k])))
         for k in self.obs_keys
     } for _ in env_fns]
     self.parent_pipes = []
     self.procs = []
     with clear_mpi_env_vars():
         for env_fn, obs_buf in zip(env_fns, self.obs_bufs):
             wrapped_fn = CloudpickleWrapper(env_fn)
             parent_pipe, child_pipe = ctx.Pipe()
             proc = ctx.Process(target=_subproc_worker,
                                args=(child_pipe, parent_pipe, wrapped_fn,
                                      obs_buf, self.obs_shapes,
                                      self.obs_dtypes, self.obs_keys))
             proc.daemon = True
             self.procs.append(proc)
             self.parent_pipes.append(parent_pipe)
             proc.start()
             child_pipe.close()
     self.waiting_step = False
     self.viewer = None
Ejemplo n.º 2
0
def split_data(paths, n):
    episode_size = paths.get_current_episode_size()
    logger.log("Collected episode size is ", episode_size)
    index = np.arange(episode_size)
    np.random.shuffle(index)
    train_index = index[:int(0.8 * episode_size)]
    test_index = index[int(0.8 * episode_size):]
    train_dict, test_dict = dict([]), dict([])
    for key in paths.buffers:
        data = paths.buffers[key]
        train_data, test_data = data[train_index], data[test_index]
        train_dict[key] = train_data
        test_dict[key] = test_data
    train_lst, test_lst = [], []
    for i in range(n):
        train, test = dict([]), dict([])
        for key in paths.buffers:
            train_data, test_data = train_dict[key], test_dict[key]
            train[key] = train_data[int(i / n) * 100:(int(i / n) + 1) * 100]
            test[key] = test_data[int(i / n) * 100:(int(i / n) + 1) * 100]
        train_lst.append(train)
        test_lst.append(test)

    return train_lst, test_lst
Ejemplo n.º 3
0
    def fit(self,
            obs,
            act,
            obs_next,
            epochs=1000,
            compute_normalization=True,
            verbose=False,
            valid_split_ratio=None,
            rolling_average_persitency=None,
            log_tabular=False,
            early_stopping=True):
        """
        Fits the NN dynamics model
        :param obs: observations - numpy array of shape (n_samples, ndim_obs)
        :param act: actions - numpy array of shape (n_samples, ndim_act)
        :param obs_next: observations after taking action - numpy array of shape (n_samples, ndim_obs)
        :param epochs: number of training epochs
        :param compute_normalization: boolean indicating whether normalization shall be (re-)computed given the data
        :param valid_split_ratio: relative size of validation split (float between 0.0 and 1.0)
        :param verbose: logging verbosity
        """
        assert obs.ndim == 2 and obs.shape[1] == self.obs_space_dims
        assert obs_next.ndim == 2 and obs_next.shape[1] == self.obs_space_dims
        assert act.ndim == 2 and act.shape[1] == self.action_space_dims

        if valid_split_ratio is None:
            valid_split_ratio = self.valid_split_ratio
        if rolling_average_persitency is None:
            rolling_average_persitency = self.rolling_average_persitency

        assert 1 > valid_split_ratio >= 0

        sess = tf.get_default_session()

        # split into valid and test set
        delta = obs_next - obs
        obs_train, act_train, delta_train, obs_test, act_test, delta_test = train_test_split(
            obs, act, delta, test_split_ratio=valid_split_ratio)

        if self._dataset_test is None:
            self._dataset_test = dict(obs=obs_test,
                                      act=act_test,
                                      delta=delta_test)
            self._dataset_train = dict(obs=obs_train,
                                       act=act_train,
                                       delta=delta_train)
        else:
            n_test_new_samples = len(obs_test)
            n_max_test = self.buffer_size - n_test_new_samples
            n_train_new_samples = len(obs_train)
            n_max_train = self.buffer_size - n_train_new_samples
            self._dataset_test['obs'] = np.concatenate(
                [self._dataset_test['obs'][-n_max_test:], obs_test])
            self._dataset_test['act'] = np.concatenate(
                [self._dataset_test['act'][-n_max_test:], act_test])
            self._dataset_test['delta'] = np.concatenate(
                [self._dataset_test['delta'][-n_max_test:], delta_test])

            self._dataset_train['obs'] = np.concatenate(
                [self._dataset_train['obs'][-n_max_train:], obs_train])
            self._dataset_train['act'] = np.concatenate(
                [self._dataset_train['act'][-n_max_train:], act_train])
            self._dataset_train['delta'] = np.concatenate(
                [self._dataset_train['delta'][-n_max_train:], delta_train])

        # create data queue
        if self.next_batch is None:
            self.next_batch, self.iterator = self._data_input_fn(
                self._dataset_train['obs'],
                self._dataset_train['act'],
                self._dataset_train['delta'],
                batch_size=self.batch_size,
                buffer_size=self.buffer_size)

        valid_loss_rolling_average = None

        if (self.normalization is None
                or compute_normalization) and self.normalize_input:
            self.compute_normalization(self._dataset_train['obs'],
                                       self._dataset_train['act'],
                                       self._dataset_train['delta'])

        if self.normalize_input:
            # normalize data
            obs_train, act_train, delta_train = self._normalize_data(
                self._dataset_train['obs'], self._dataset_train['act'],
                self._dataset_train['delta'])
            assert obs_train.ndim == act_train.ndim == delta_train.ndim == 2
        else:
            obs_train = self._dataset_train['obs']
            act_train = self._dataset_train['act']
            delta_train = self._dataset_train['delta']

        # Training loop
        for epoch in range(epochs):

            # initialize data queue
            sess.run(self.iterator.initializer,
                     feed_dict={
                         self.obs_dataset_ph: obs_train,
                         self.act_dataset_ph: act_train,
                         self.delta_dataset_ph: delta_train
                     })

            batch_losses = []
            while True:
                try:
                    obs_batch, act_batch, delta_batch = sess.run(
                        self.next_batch)

                    # run train op
                    batch_loss, _ = sess.run(
                        [self.loss, self.train_op],
                        feed_dict={
                            self.obs_ph: obs_batch,
                            self.act_ph: act_batch,
                            self.delta_ph: delta_batch
                        })

                    batch_losses.append(batch_loss)

                except tf.errors.OutOfRangeError:
                    # compute validation loss
                    if self.normalize_input:
                        # normalize data
                        obs_test, act_test, delta_test = self._normalize_data(
                            self._dataset_test['obs'],
                            self._dataset_test['act'],
                            self._dataset_test['delta'])
                        assert obs_test.ndim == act_test.ndim == delta_test.ndim == 2
                    else:
                        obs_test = self._dataset_test['obs']
                        act_test = self._dataset_test['act']
                        delta_test = self._dataset_test['delta']

                    valid_loss = sess.run(self.loss,
                                          feed_dict={
                                              self.obs_ph: obs_test,
                                              self.act_ph: act_test,
                                              self.delta_ph: delta_test
                                          })
                    if valid_loss_rolling_average is None:
                        valid_loss_rolling_average = 1.5 * valid_loss  # set initial rolling to a higher value avoid too early stopping
                        valid_loss_rolling_average_prev = 2.0 * valid_loss

                    valid_loss_rolling_average = rolling_average_persitency * valid_loss_rolling_average + (
                        1.0 - rolling_average_persitency) * valid_loss

                    if verbose:
                        logger.log(
                            "Training NNDynamicsModel - finished epoch %i -- train loss: %.4f  valid loss: %.4f  valid_loss_mov_avg: %.4f"
                            % (epoch, float(np.mean(batch_losses)), valid_loss,
                               valid_loss_rolling_average))
                    break

            if early_stopping and valid_loss_rolling_average_prev < valid_loss_rolling_average:
                logger.log(
                    'Stopping DynamicsEnsemble Training since valid_loss_rolling_average decreased'
                )
                break
            valid_loss_rolling_average_prev = valid_loss_rolling_average
Ejemplo n.º 4
0
def main(**kwargs):
    z_dim = kwargs['z_dim']
    trans_mode = kwargs['trans_mode']
    epochs = kwargs['epochs']
    include_action = kwargs['include_action']
    label = kwargs['label']

    dataset = kwargs['data_path']
    feature_dims = kwargs['feature_dims']
    mode = kwargs['mode']
    n = kwargs['n']
    k = kwargs['k']
    encoder_lr = kwargs['encoder_lr']
    decoder_lr = kwargs['decoder_lr']
    decoder_feature_dims = kwargs['decoder_feature_dims']
    process_type = kwargs['process_type']

    if kwargs['data_path'] == '../dataset/sequence/HandManipulateEgg-v0/5seeds-dict.pickle':
        kwargs['dataset'] = 'trained_5seeds'
    elif kwargs['data_path'] == '../dataset/untrained/HandManipulateEgg-v0/5seeds-dict.pickle':
        kwargs['dataset'] = 'untrained_5seeds'
    elif kwargs['data_path'] == '../dataset/HandManipulateEgg-v09-dict.pickle':
        kwargs['dataset'] = 'trained_1seed'
    exp_dir = os.getcwd() + '/data/' + EXP_NAME + '/' + str(kwargs['seed'])
    if kwargs['debug']:
        save_dir = '../saved_cpc/' + str(label) + '/' +  str(kwargs['normalize_data']) + '/' + str(process_type)+ '/trained/debug'
        # save_dir = '../saved_cpc/' + str(label) + '/' + str(process_type)+ '/trained/debug'
    else:
        save_dir = '../saved_cpc/' + str(label) + '/' +  str(kwargs['normalize_data']) + '/' + str(process_type)+ '/trained'
        # save_dir = '../saved_cpc/' + str(label) + '/' + str(process_type)+ '/trained'
    logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'], snapshot_mode='last')
    json.dump(kwargs, open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True, cls=ClassEncoder)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = kwargs.get('gpu_frac', 0.95)
    sess = tf.Session(config=config)

    obs, acts, fixed_num_of_contact = pickle.load(open(dataset, 'rb'))

    env = gym.make(kwargs['env'],
                   obs_type = kwargs['obs_type'],
                   fixed_num_of_contact = [fixed_num_of_contact, True])

    ngeoms = env.sim.model.ngeom
    obs, object_info = expand_data(obs, ngeoms, fixed_num_of_contact)
    if kwargs['normalize_data']:
        obs = normalize_obs(obs)
    next_obs = obs[:, 1:]
    obs = obs[:, :-1]
    N, L, _, contact_point_dim = obs.shape
    N, L, action_dim = acts.shape

    obs_dim = (fixed_num_of_contact, contact_point_dim)
    train_data, test_data = split_data([obs, acts, next_obs, object_info])

    batch_size = 2

    if mode in ['restore', 'store_weights']:
        saver = tf.train.import_meta_graph(save_dir + '-999.meta')
        pur_save_dir = save_dir[:-8]
        saver.restore(sess, tf.train.latest_checkpoint(pur_save_dir))
        graph = tf.get_default_graph()

    with sess.as_default() as sess:
        encoder = Encoder(z_dim,
                          fixed_num_of_contact,
                          contact_point_dim,
                          feature_dims)
        trans = Transition(z_dim, action_dim, mode = trans_mode)
        cpc = CPC(sess,
                  encoder,
                  trans,
                  encoder_lr,
                  fixed_num_of_contact,
                  contact_point_dim,
                  action_dim,
                  include_action = include_action,
                  type = 1*(label=='cpc1') + 2*(label=='cpc2'),
                  n_neg = n,
                  process_type = process_type,
                  mode = mode)

        cpc_epochs, decoder_epochs = epochs
        if mode == 'train':
            sess.run(tf.global_variables_initializer())
            logger.log("training started")
            for epoch in range(cpc_epochs):
                # train_cpc(cpc, epoch, train_data, batch_size, n, k)
                test_cpc(cpc, epoch, test_data, batch_size, n, k)

                logger.logkv("epoch", epoch)
                logger.dumpkvs()
            cpc.save_model(save_dir, 999)

            """decoder"""
            logger.log("Done with cpc training.")

            decoder = Decoder(cpc,
                              sess,
                              z_dim,
                              decoder_feature_dims,
                              fixed_num_of_contact,
                              contact_point_dim,
                              decoder_lr)
            uninit_vars = [var for var in tf.global_variables() if not sess.run(tf.is_variable_initialized(var))]
            sess.run(tf.variables_initializer(uninit_vars))
            for epoch in range(decoder_epochs):
                train_decoder(decoder, epoch, train_data, batch_size, n, k)
                test_decoder(decoder, epoch, test_data, batch_size, n, k)

                logger.logkv("epoch", (epoch + cpc_epochs))
                logger.dumpkvs()
            print("model saved in", save_dir)

        elif mode == 'restore':
            decoder = Decoder(cpc,
                              sess,
                              z_dim,
                              decoder_feature_dims,
                              fixed_num_of_contact,
                              contact_point_dim,
                              decoder_lr)
            uninit_vars = [var for var in tf.global_variables() if not sess.run(tf.is_variable_initialized(var))]
            sess.run(tf.variables_initializer(uninit_vars))
            print("initialized")
            for epoch in range(100):
                train_decoder(decoder, epoch, train_data, batch_size, n, k)
                test_decoder(decoder, epoch, test_data, batch_size, n, k)

                logger.logkv("epoch", epoch)
                logger.dumpkvs()
                print("logging to", exp_dir)

        elif mode == 'store_weights':
            old = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='')
            old = sess.run(old)
            save_dir = './saved_model/' +  str(label) + '/' + str(process_type)+ '/trained/'
            with open(save_dir + 'weights.pickle', 'wb') as pickle_file:
                pickle.dump(old, pickle_file)
            print("weights saved to", save_dir)

            save_dir = '/home/vioichigo/try/tactile-baselines/saved_model/cpc2/trained'
            with open(save_dir + 'params.pickle', 'wb') as pickle_file:
                pickle.dump([z_dim, fixed_num_of_contact, contact_point_dim, action_dim, encoder_lr, feature_dims, trans_mode, label, include_action], pickle_file)

        tf.reset_default_graph()
        print("graph reset successfully")
Ejemplo n.º 5
0
 def save_model(self, model_dir, i=999):
     saver = tf.train.Saver()
     saver.save(self.sess, model_dir, global_step=i)
     logger.log("saved successfully")
Ejemplo n.º 6
0
    def train(self):
        """
        Trains policy on env using algo
        Pseudocode:
            for itr in n_itr:
                for step in num_inner_grad_steps:
                    sampler.sample()
                    algo.compute_updated_dists()
                algo.optimize_policy()
                sampler.update_goals()
        """

        with self.sess.as_default() as sess:
            # initialize uninitialized vars  (only initialize vars that were not loaded)
            sess.run(tf.global_variables_initializer())
            start_time = time.time()

            if self.start_itr == 0:
                self.algo._update_target(tau=1.0)
                if self.n_initial_exploration_steps > 0:
                    while self.replay_buffer._size < self.n_initial_exploration_steps:
                        paths = self.sampler.obtain_samples(
                            log=True, log_prefix='train-', random=True)
                        samples_data = self.sample_processor.process_samples(
                            paths, log='all', log_prefix='train-')[0]
                        self.replay_buffer.add_samples(
                            samples_data['observations'],
                            samples_data['actions'],
                            samples_data['rewards'],
                            samples_data['dones'],
                            samples_data['next_observations'],
                        )

            for itr in range(self.start_itr, self.n_itr):
                itr_start_time = time.time()
                logger.log(
                    "\n ---------------- Iteration %d ----------------" % itr)
                logger.log(
                    "Sampling set of tasks/goals for this meta-batch...")
                """ -------------------- Sampling --------------------------"""

                logger.log("Obtaining samples...")
                time_env_sampling_start = time.time()
                paths = self.sampler.obtain_samples(log=True,
                                                    log_prefix='train-')
                sampling_time = time.time() - time_env_sampling_start
                """ ----------------- Processing Samples ---------------------"""
                # check how the samples are processed
                logger.log("Processing samples...")
                time_proc_samples_start = time.time()
                samples_data = self.sample_processor.process_samples(
                    paths, log='all', log_prefix='train-')[0]
                self.replay_buffer.add_samples(
                    samples_data['observations'],
                    samples_data['actions'],
                    samples_data['rewards'],
                    samples_data['dones'],
                    samples_data['next_observations'],
                )
                proc_samples_time = time.time() - time_proc_samples_start

                paths = self.sampler.obtain_samples(log=True,
                                                    log_prefix='eval-',
                                                    deterministic=True)
                _ = self.sample_processor.process_samples(
                    paths, log='all', log_prefix='eval-')[0]

                # self.log_diagnostics(paths, prefix='train-')
                """ ------------------ Policy Update ---------------------"""

                logger.log("Optimizing policy...")

                # This needs to take all samples_data so that it can construct graph for meta-optimization.
                time_optimization_step_start = time.time()

                self.algo.optimize_policy(self.replay_buffer,
                                          itr * self.epoch_length,
                                          self.num_grad_steps)
                """ ------------------- Logging Stuff --------------------------"""
                logger.logkv('Itr', itr)
                logger.logkv('n_timesteps',
                             self.sampler.total_timesteps_sampled)

                logger.logkv('Time-Optimization',
                             time.time() - time_optimization_step_start)
                logger.logkv('Time-SampleProc', np.sum(proc_samples_time))
                logger.logkv('Time-Sampling', sampling_time)

                logger.logkv('Time', time.time() - start_time)
                logger.logkv('ItrTime', time.time() - itr_start_time)

                logger.dumpkvs()
                if itr == 0:
                    sess.graph.finalize()

        logger.log("Training finished")
        self.sess.close()
Ejemplo n.º 7
0
    def fit(self,
            obs,
            act,
            obs_next,
            epochs=1000,
            compute_normalization=True,
            valid_split_ratio=None,
            rolling_average_persitency=None,
            verbose=False,
            log_tabular=False,
            prefix=''):
        """
        Fits the NN dynamics model
        :param obs: observations - numpy array of shape (n_samples, ndim_obs)
        :param act: actions - numpy array of shape (n_samples, ndim_act)
        :param obs_next: observations after taking action - numpy array of shape (n_samples, ndim_obs)
        :param epochs: number of training epochs
        :param compute_normalization: boolean indicating whether normalization shall be (re-)computed given the data
        :param valid_split_ratio: relative size of validation split (float between 0.0 and 1.0)
        :param (boolean) whether to log training stats in tabular format
        :param verbose: logging verbosity
        """

        if obs is not None:
            self.update_buffer(obs, act, obs_next, valid_split_ratio,
                               compute_normalization)

        if rolling_average_persitency is None:
            rolling_average_persitency = self.rolling_average_persitency

        sess = tf.get_default_session()

        if compute_normalization and self.normalize_input:
            self.compute_normalization(self._dataset_train['obs'],
                                       self._dataset_train['act'],
                                       self._dataset_train['delta'])

        if self.normalize_input:
            # normalize data
            obs_train, act_train, delta_train = self._normalize_data(
                self._dataset_train['obs'], self._dataset_train['act'],
                self._dataset_train['delta'])
        else:
            obs_train, act_train, delta_train = self._dataset_train['obs'], self._dataset_train['act'],\
                                                self._dataset_train['delta']

        valid_loss_rolling_average = None
        train_op_to_do = self.train_op_model_batches
        idx_to_remove = []
        epoch_times = []
        epochs_per_model = []
        """ ------- Looping over training epochs ------- """
        for epoch in range(epochs):

            # initialize data queue
            feed_dict = dict(
                list(zip(self.obs_batches_dataset_ph, obs_train)) +
                list(zip(self.act_batches_dataset_ph, act_train)) +
                list(zip(self.delta_batches_dataset_ph, delta_train)))
            sess.run(self.iterator.initializer, feed_dict=feed_dict)

            # preparations for recording training stats
            epoch_start_time = time.time()
            batch_losses = []
            """ ------- Looping through the shuffled and batched dataset for one epoch -------"""
            while True:
                try:
                    obs_act_delta = sess.run(self.next_batch)
                    obs_batch_stack = np.concatenate(
                        obs_act_delta[:self.num_models], axis=0)
                    act_batch_stack = np.concatenate(
                        obs_act_delta[self.num_models:2 * self.num_models],
                        axis=0)
                    delta_batch_stack = np.concatenate(
                        obs_act_delta[2 * self.num_models:], axis=0)

                    # run train op
                    batch_loss_train_ops = sess.run(
                        self.loss_model_batches + train_op_to_do,
                        feed_dict={
                            self.obs_model_batches_stack_ph: obs_batch_stack,
                            self.act_model_batches_stack_ph: act_batch_stack,
                            self.delta_model_batches_stack_ph:
                            delta_batch_stack
                        })

                    batch_loss = np.array(
                        batch_loss_train_ops[:self.num_models])
                    batch_losses.append(batch_loss)

                except tf.errors.OutOfRangeError:
                    if self.normalize_input:
                        # normalize data
                        obs_test, act_test, delta_test = self._normalize_data(
                            self._dataset_test['obs'],
                            self._dataset_test['act'],
                            self._dataset_test['delta'])

                    else:
                        obs_test, act_test, delta_test = self._dataset_test['obs'], self._dataset_test['act'], \
                                                         self._dataset_test['delta']

                    obs_test_stack = np.concatenate(obs_test, axis=0)
                    act_test_stack = np.concatenate(act_test, axis=0)
                    delta_test_stack = np.concatenate(delta_test, axis=0)

                    # compute validation loss
                    valid_loss = sess.run(
                        self.loss_model_batches,
                        feed_dict={
                            self.obs_model_batches_stack_ph: obs_test_stack,
                            self.act_model_batches_stack_ph: act_test_stack,
                            self.delta_model_batches_stack_ph: delta_test_stack
                        })
                    valid_loss = np.array(valid_loss)
                    if valid_loss_rolling_average is None:
                        valid_loss_rolling_average = 1.5 * valid_loss  # set initial rolling to a higher value avoid too early stopping
                        valid_loss_rolling_average_prev = 2.0 * valid_loss
                        for i in range(len(valid_loss)):
                            if valid_loss[i] < 0:
                                valid_loss_rolling_average[i] = valid_loss[
                                    i] / 1.5  # set initial rolling to a higher value avoid too early stopping
                                valid_loss_rolling_average_prev[
                                    i] = valid_loss[i] / 2.0

                    valid_loss_rolling_average = rolling_average_persitency*valid_loss_rolling_average \
                                                 + (1.0-rolling_average_persitency)*valid_loss

                    if verbose:
                        str_mean_batch_losses = ' '.join([
                            '%.4f' % x for x in np.mean(batch_losses, axis=0)
                        ])
                        str_valid_loss = ' '.join(
                            ['%.4f' % x for x in valid_loss])
                        str_valid_loss_rolling_averge = ' '.join(
                            ['%.4f' % x for x in valid_loss_rolling_average])
                        logger.log(
                            "Training NNDynamicsModel - finished epoch %i --\n"
                            "train loss: %s\nvalid loss: %s\nvalid_loss_mov_avg: %s"
                            % (epoch, str_mean_batch_losses, str_valid_loss,
                               str_valid_loss_rolling_averge))
                    break

            for i in range(self.num_models):
                if (valid_loss_rolling_average_prev[i] <
                        valid_loss_rolling_average[i]
                        or epoch == epochs - 1) and i not in idx_to_remove:
                    idx_to_remove.append(i)
                    epochs_per_model.append(epoch)
                    if epoch < epochs - 1:
                        logger.log(
                            'At Epoch {}, stop model {} since its valid_loss_rolling_average decreased'
                            .format(epoch, i))

            train_op_to_do = [
                op for idx, op in enumerate(self.train_op_model_batches)
                if idx not in idx_to_remove
            ]

            if not idx_to_remove:
                epoch_times.append(
                    time.time() - epoch_start_time
                )  # only track epoch times while all models are trained

            if not train_op_to_do:
                if verbose and epoch < epochs - 1:
                    logger.log(
                        'Stopping all DynamicsEnsemble Training before reaching max_num_epochs'
                    )
                break
            valid_loss_rolling_average_prev = valid_loss_rolling_average
        """ ------- Tabular Logging ------- """
        if log_tabular:
            logger.logkv(prefix + 'AvgModelEpochTime', np.mean(epoch_times))
            assert len(epochs_per_model) == self.num_models
            logger.logkv(prefix + 'AvgEpochs', np.mean(epochs_per_model))
            logger.logkv(prefix + 'StdEpochs', np.std(epochs_per_model))
            logger.logkv(prefix + 'MaxEpochs', np.max(epochs_per_model))
            logger.logkv(prefix + 'MinEpochs', np.min(epochs_per_model))
            logger.logkv(prefix + 'AvgFinalTrainLoss', np.mean(batch_losses))
            logger.logkv(prefix + 'AvgFinalValidLoss', np.mean(valid_loss))
            logger.logkv(prefix + 'AvgFinalValidLossRoll',
                         np.mean(valid_loss_rolling_average))
Ejemplo n.º 8
0
    def fit_one_epoch(self,
                      remaining_model_idx,
                      valid_loss_rolling_average_prev,
                      with_new_data,
                      compute_normalization=True,
                      rolling_average_persitency=None,
                      verbose=False,
                      log_tabular=False,
                      prefix=''):

        if rolling_average_persitency is None:
            rolling_average_persitency = self.rolling_average_persitency

        sess = tf.get_default_session()

        if with_new_data:
            if compute_normalization and self.normalize_input:
                self.compute_normalization(self._dataset_train['obs'],
                                           self._dataset_train['act'],
                                           self._dataset_train['delta'])

        self.used_timesteps_counter += len(self._dataset_train['obs'][0])
        if self.normalize_input:
            # normalize data
            obs_train, act_train, delta_train = self._normalize_data(
                self._dataset_train['obs'], self._dataset_train['act'],
                self._dataset_train['delta'])
        else:
            obs_train, act_train, delta_train = self._dataset_train['obs'], self._dataset_train['act'], \
                                                self._dataset_train['delta']

        valid_loss_rolling_average = valid_loss_rolling_average_prev
        assert remaining_model_idx is not None
        train_op_to_do = [
            op for idx, op in enumerate(self.train_op_model_batches)
            if idx in remaining_model_idx
        ]

        # initialize data queue
        feed_dict = dict(
            list(zip(self.obs_batches_dataset_ph, obs_train)) +
            list(zip(self.act_batches_dataset_ph, act_train)) +
            list(zip(self.delta_batches_dataset_ph, delta_train)))
        sess.run(self.iterator.initializer, feed_dict=feed_dict)

        # preparations for recording training stats
        batch_losses = []
        """ ------- Looping through the shuffled and batched dataset for one epoch -------"""
        while True:
            try:
                obs_act_delta = sess.run(self.next_batch)
                obs_batch_stack = np.concatenate(
                    obs_act_delta[:self.num_models], axis=0)
                act_batch_stack = np.concatenate(
                    obs_act_delta[self.num_models:2 * self.num_models], axis=0)
                delta_batch_stack = np.concatenate(
                    obs_act_delta[2 * self.num_models:], axis=0)

                # run train op
                batch_loss_train_ops = sess.run(
                    self.loss_model_batches + train_op_to_do,
                    feed_dict={
                        self.obs_model_batches_stack_ph: obs_batch_stack,
                        self.act_model_batches_stack_ph: act_batch_stack,
                        self.delta_model_batches_stack_ph: delta_batch_stack
                    })

                batch_loss = np.array(batch_loss_train_ops[:self.num_models])
                batch_losses.append(batch_loss)

            except tf.errors.OutOfRangeError:
                if self.normalize_input:
                    # TODO: if not with_new_data, don't recompute
                    # normalize data
                    obs_test, act_test, delta_test = self._normalize_data(
                        self._dataset_test['obs'], self._dataset_test['act'],
                        self._dataset_test['delta'])

                else:
                    obs_test, act_test, delta_test = self._dataset_test['obs'], self._dataset_test['act'], \
                                                     self._dataset_test['delta']

                obs_test_stack = np.concatenate(obs_test, axis=0)
                act_test_stack = np.concatenate(act_test, axis=0)
                delta_test_stack = np.concatenate(delta_test, axis=0)

                # compute validation loss
                valid_loss = sess.run(self.loss_model_batches,
                                      feed_dict={
                                          self.obs_model_batches_stack_ph:
                                          obs_test_stack,
                                          self.act_model_batches_stack_ph:
                                          act_test_stack,
                                          self.delta_model_batches_stack_ph:
                                          delta_test_stack
                                      })
                valid_loss = np.array(valid_loss)

                if valid_loss_rolling_average is None:
                    valid_loss_rolling_average = 1.5 * valid_loss  # set initial rolling to a higher value avoid too early stopping
                    valid_loss_rolling_average_prev = 2.0 * valid_loss
                    for i in range(len(valid_loss)):
                        if valid_loss[i] < 0:
                            valid_loss_rolling_average[i] = valid_loss[
                                i] / 1.5  # set initial rolling to a higher value avoid too early stopping
                            valid_loss_rolling_average_prev[
                                i] = valid_loss[i] / 2.0

                valid_loss_rolling_average = rolling_average_persitency*valid_loss_rolling_average \
                                             + (1.0-rolling_average_persitency)*valid_loss

                if verbose:
                    str_mean_batch_losses = ' '.join(
                        ['%.4f' % x for x in np.mean(batch_losses, axis=0)])
                    str_valid_loss = ' '.join(['%.4f' % x for x in valid_loss])
                    str_valid_loss_rolling_averge = ' '.join(
                        ['%.4f' % x for x in valid_loss_rolling_average])
                    logger.log(
                        "Training NNDynamicsModel - finished one epoch\n"
                        "train loss: %s\nvalid loss: %s\nvalid_loss_mov_avg: %s"
                        % (str_mean_batch_losses, str_valid_loss,
                           str_valid_loss_rolling_averge))
                break

        for i in remaining_model_idx:
            if valid_loss_rolling_average_prev[i] < valid_loss_rolling_average[
                    i]:
                remaining_model_idx.remove(i)
                logger.log(
                    'Stop model {} since its valid_loss_rolling_average decreased'
                    .format(i))
        """ ------- Tabular Logging ------- """
        if log_tabular:
            logger.logkv(prefix + 'TimeStepsCtr', self.timesteps_counter)
            logger.logkv(prefix + 'UsedTimeStepsCtr',
                         self.used_timesteps_counter)
            logger.logkv(prefix + 'AvgSampleUsage',
                         self.used_timesteps_counter / self.timesteps_counter)
            logger.logkv(prefix + 'NumModelRemaining',
                         len(remaining_model_idx))
            logger.logkv(prefix + 'AvgTrainLoss', np.mean(batch_losses))
            logger.logkv(prefix + 'AvgValidLoss', np.mean(valid_loss))
            logger.logkv(prefix + 'AvgValidLossRoll',
                         np.mean(valid_loss_rolling_average))

        return remaining_model_idx, valid_loss_rolling_average
Ejemplo n.º 9
0
    def update_buffer(self,
                      obs,
                      act,
                      obs_next,
                      valid_split_ratio=None,
                      check_init=True):

        assert obs.ndim == 2 and obs.shape[1] == self.obs_space_dims
        assert obs_next.ndim == 2 and obs_next.shape[1] == self.obs_space_dims
        assert act.ndim == 2 and act.shape[1] == self.action_space_dims

        self.timesteps_counter += obs.shape[0]

        if valid_split_ratio is None:
            valid_split_ratio = self.valid_split_ratio

        assert 1 > valid_split_ratio >= 0

        # split into valid and test set
        obs_train_batches = []
        act_train_batches = []
        delta_train_batches = []
        obs_test_batches = []
        act_test_batches = []
        delta_test_batches = []

        delta = obs_next - obs
        for i in range(self.num_models):
            obs_train, act_train, delta_train, obs_test, act_test, delta_test = train_test_split(
                obs, act, delta, test_split_ratio=valid_split_ratio)
            obs_train_batches.append(obs_train)
            act_train_batches.append(act_train)
            delta_train_batches.append(delta_train)
            obs_test_batches.append(obs_test)
            act_test_batches.append(act_test)
            delta_test_batches.append(delta_test)
            # create data queue

        # If case should be entered exactly once
        if check_init and self._dataset_test is None:
            self._dataset_test = dict(obs=obs_test_batches,
                                      act=act_test_batches,
                                      delta=delta_test_batches)
            self._dataset_train = dict(obs=obs_train_batches,
                                       act=act_train_batches,
                                       delta=delta_train_batches)

            assert self.next_batch is None
            self.next_batch, self.iterator = self._data_input_fn(
                self._dataset_train['obs'],
                self._dataset_train['act'],
                self._dataset_train['delta'],
                batch_size=self.batch_size)
            assert self.normalization is None
            if self.normalize_input:
                self.compute_normalization(self._dataset_train['obs'],
                                           self._dataset_train['act'],
                                           self._dataset_train['delta'])
        else:
            n_test_new_samples = len(obs_test_batches[0])
            n_max_test = self.buffer_size_test - n_test_new_samples
            n_train_new_samples = len(obs_train_batches[0])
            n_max_train = self.buffer_size_train - n_train_new_samples
            for i in range(self.num_models):

                self._dataset_test['obs'][i] = np.concatenate([
                    self._dataset_test['obs'][i][-n_max_test:],
                    obs_test_batches[i]
                ])
                self._dataset_test['act'][i] = np.concatenate([
                    self._dataset_test['act'][i][-n_max_test:],
                    act_test_batches[i]
                ])
                self._dataset_test['delta'][i] = np.concatenate([
                    self._dataset_test['delta'][i][-n_max_test:],
                    delta_test_batches[i]
                ])

                self._dataset_train['obs'][i] = np.concatenate([
                    self._dataset_train['obs'][i][-n_max_train:],
                    obs_train_batches[i]
                ])
                self._dataset_train['act'][i] = np.concatenate([
                    self._dataset_train['act'][i][-n_max_train:],
                    act_train_batches[i]
                ])
                self._dataset_train['delta'][i] = np.concatenate([
                    self._dataset_train['delta'][i][-n_max_train:],
                    delta_train_batches[i]
                ])

        logger.log(
            'Model has dataset_train, dataset_test with size {}, {}'.format(
                len(self._dataset_train['obs'][0]),
                len(self._dataset_test['obs'][0])))