コード例 #1
0
 def __init__(self, dataset, model_type, loss_type, dim_input, dim_output,
              alpha, beta, K, batch_size, is_train, num_updates, norm):
     '''
     model_tpye: choose model tpye for each task, choice: ('fc',)
     loss_type:  choose the form of the objective function
     dim_input:  input dimension
     dim_output: desired output dimension
     alpha:      fixed learning rate to calculate the gradient
     beta:       learning rate used for Adam Optimizer
     K:          perform K-shot learning
     batch_size: number of tasks sampled in each iteration
     '''
     self._sess = utils.get_session(1)
     self._is_train = is_train
     self._dataset = dataset
     self._alpha = alpha
     self._K = K
     self._norm = norm
     self._dim_input = dim_input
     self._dim_output = dim_output
     self._batch_size = batch_size
     self._num_updates = num_updates
     self._meta_optimizer = tf.train.AdamOptimizer(beta)
     self._avoid_second_derivative = False
     self._task_name = 'MAML.{}_{}-shot_{}-updates_{}-batch_norm-{}'.format(
         dataset.name, self._K, self._num_updates, self._batch_size,
         self._norm)
     log.infov('Task name: {}'.format(self._task_name))
     # Build placeholder
     self._build_placeholder()
     # Build model
     model = self._import_model(model_type)
     self._construct_weights = model.construct_weights
     self._contruct_forward = model.construct_forward
     # Loss function
     self._loss_fn = self._get_loss_fn(loss_type)
     self._build_graph(dim_input, dim_output, norm=norm)
     # Misc
     self._summary_dir = os.path.join('log', self._task_name)
     self._checkpoint_dir = os.path.join('checkpoint', self._task_name)
     self._saver = tf.train.Saver(max_to_keep=10)
     if self._is_train:
         if not os.path.exists(self._summary_dir):
             os.makedirs(self._summary_dir)
         self._writer = tf.summary.FileWriter(self._summary_dir,
                                              self._sess.graph)
         if not os.path.exists(self._checkpoint_dir):
             os.makedirs(self._checkpoint_dir)
     # Initialize all variables
     log.infov("Initialize all variables")
     self._sess.run(tf.global_variables_initializer())
コード例 #2
0
 def learn(self, batch_size, dataset, max_steps):
     for step in range(int(max_steps)):
         meta_val_loss, meta_train_loss, summary_str = self._single_train_step(
             dataset, batch_size, step)
         # Log/TF_board/Save/Evaluate
         if step % SUMMARY_FREQ == 0:
             self._writer.add_summary(summary_str, step)
         if step % LOG_FREQ == 0:
             log.info(
                 "Step: {}/{}, Meta train loss: {:.4f}, Meta val loss: {:.4f}"
                 .format(step, int(max_steps), meta_train_loss,
                         meta_val_loss))
         if step % SAVE_FREQ == 0:
             log.infov("Save checkpoint-{}".format(step))
             self._saver.save(self._sess,
                              os.path.join(self._checkpoint_dir,
                                           'checkpoint'),
                              global_step=step)
         if step % EVAL_FREQ == 0:
             self.evaluate(dataset, 100, False)
コード例 #3
0
    def evaluate(self, dataset, test_steps, draw, **kwargs):
        if not self._is_train:
            assert kwargs['restore_checkpoint'] is not None or \
                kwargs['restore_dir'] is not None
            if kwargs['restore_checkpoint'] is None:
                restore_checkpoint = tf.train.latest_checkpoint(
                    kwargs['restore_dir'])
            else:
                restore_checkpoint = kwargs['restore_checkpoint']
            self._saver.restore(self._sess, restore_checkpoint)
            log.infov('Load model: {}'.format(restore_checkpoint))
            if draw:
                draw_dir = os.path.join('vis', self._task_name)
                if not os.path.exists(draw_dir):
                    os.makedirs(draw_dir)
        accumulated_val_loss = []
        accumulated_train_loss = []
        for step in tqdm(range(test_steps)):
            output, val_loss, train_loss, amplitude, phase, inp = \
                self._single_test_step(dataset, 1)
            if not self._is_train and draw:
                # visualize one by one
                for am, ph in zip(amplitude, phase):
                    dataset.visualize(am,
                                      ph,
                                      inp[:, self._K:, :],
                                      output,
                                      path=os.path.join(
                                          draw_dir, '{}.png'.format(step)))

            accumulated_val_loss.append(val_loss)
            accumulated_train_loss.append(train_loss)
        val_loss_mean = sum(accumulated_val_loss) / test_steps
        train_loss_mean = sum(accumulated_train_loss) / test_steps
        log.infov(
            "[Evaluate] Meta train loss: {:.4f}, Meta val loss: {:.4f}".format(
                train_loss_mean, val_loss_mean))
コード例 #4
0
ファイル: maml.py プロジェクト: Jerryxiaoyu/local_mpc
    def evaluate2(self,
                  dataset,
                  test_steps,
                  draw,
                  load_model=False,
                  task_range=(0, 7),
                  **kwargs):
        '''
        evaluate meta-model over uniform distribution tasks to get loss
        
        :param dataset:
        :param test_steps:
        :param draw:
        :param load_model:
        :param task_range:
        :param kwargs:
        :return:
        '''
        if load_model:
            assert kwargs['restore_checkpoint'] is not None or \
                kwargs['restore_dir'] is not None
            if kwargs['restore_checkpoint'] is None:
                restore_checkpoint = tf.train.latest_checkpoint(
                    kwargs['restore_dir'])
            else:
                restore_checkpoint = kwargs['restore_checkpoint']
            self._saver.restore(self._sess, restore_checkpoint)
            log.infov('Load model: {}'.format(restore_checkpoint))
        for tm in range(5):
            accumulated_val_loss = []
            accumulated_train_loss = []
            tasks = []
            for step in tqdm(range(test_steps)):
                #task = self._sample_task_fun(task_range[0], task_range[1] ,(1,))
                task = np.array([
                    0 + step * 0.05,
                ])

                output, val_loss, train_loss, x, y = self._single_test_step(
                    dataset, 1, task=task)
                if load_model and draw:
                    # visualize one by one
                    draw_dir = os.path.join(
                        self._logdir, 'vis', self._task_name,
                        'exp_' + str(step) + '_task_num' + str(task[0]) +
                        '_loss' + str(val_loss))
                    if not os.path.exists(draw_dir):
                        os.makedirs(draw_dir)
                    dataset.visualize(x[:, self._K:, :], y[:, self._K:, :],
                                      output, draw_dir)

                accumulated_val_loss.append(val_loss)
                accumulated_train_loss.append(train_loss)
                tasks.append(task)

            val_loss_mean = sum(accumulated_val_loss) / test_steps
            train_loss_mean = sum(accumulated_train_loss) / test_steps
            log.infov(
                "[Evaluate] Meta train loss: {:.4f}, Meta val loss: {:.4f}".
                format(train_loss_mean, val_loss_mean))
            data_tmp = np.array(accumulated_val_loss).reshape((-1, 1))
            if tm == 0:
                data2 = data_tmp
            else:
                data2 = np.concatenate((data2, data_tmp), axis=1)

        log_dir = os.path.join('vis', self._task_name)
        logger = Logger(log_dir, csvname='log')
        data1 = np.array(tasks)

        data = np.concatenate((data1, data2), axis=1)
        logger.log_table2csv(data)
コード例 #5
0
ファイル: maml.py プロジェクト: Jerryxiaoyu/local_mpc
    def evaluate(self,
                 dataset,
                 test_steps,
                 draw,
                 load_model=False,
                 task_range=(0, 7),
                 task_type='rand',
                 **kwargs):
        if load_model:
            assert kwargs['restore_checkpoint'] is not None or \
                kwargs['restore_dir'] is not None
            if kwargs['restore_checkpoint'] is None:
                restore_checkpoint = tf.train.latest_checkpoint(
                    kwargs['restore_dir'])
            else:
                restore_checkpoint = kwargs['restore_checkpoint']
            self._saver.restore(self._sess, restore_checkpoint)
            log.infov('Load model: {}'.format(restore_checkpoint))

        tasks = []
        accumulated_val_loss, accumulated_val_loss_post = [], []
        accumulated_train_loss = []
        for step in tqdm(range(test_steps)):
            if task_type == 'rand':
                task = self._sample_task_fun(task_range[0], task_range[1],
                                             (1, ))
            elif task_type == 'lin':
                task = np.array([
                    0 + step * 0.05,
                ])
            else:
                print('please check task type!')

            output, val_loss, train_loss, x, y = self._single_test_step(
                dataset, num_tasks=1, task=task)

            if load_model and draw:
                # visualize one by one
                draw_dir = os.path.join(
                    self._logdir, 'vis', self._task_name, 'exp_' + str(step) +
                    '_task_num' + str(task[0]) + '_loss' + str(val_loss))
                if not os.path.exists(draw_dir):
                    os.makedirs(draw_dir)
                dataset.visualize(x[:, self._K:, :], y[:, self._K:, :], output,
                                  draw_dir)

            accumulated_val_loss.append(val_loss)

            accumulated_train_loss.append(train_loss)
            tasks.append(task)
        val_loss_mean = sum(accumulated_val_loss) / test_steps
        train_loss_mean = sum(accumulated_train_loss) / test_steps
        log.infov(
            "[Evaluate] Meta train loss: {:.4f}, Meta val loss: {:.4f}".format(
                train_loss_mean, val_loss_mean))
        if load_model:
            logger = Logger(self._logdir, csvname='log_test_loss')
            for i in range(test_steps):
                logger.log({
                    'num': i,
                    'task': tasks[i][0],
                    'val_loss': accumulated_val_loss[i],
                    'train_loss': accumulated_train_loss[i],
                })
                logger.write(display=False)
            logger.close()
        return train_loss_mean, val_loss_mean
コード例 #6
0
ファイル: maml.py プロジェクト: Jerryxiaoyu/local_mpc
    def learn(self,
              batch_size,
              dataset,
              max_epochs,
              is_PreTrain=False,
              **kwargs):
        # collect data
        dataset.get_dataset(resample=True,
                            task=None,
                            controller='Rand',
                            task_range=self._task_range,
                            task_fun=self._sample_task_fun)
        data_x, data_y, LengthOfData = dataset.get_batch(batch_size)

        # load data
        data_x_placeholder = tf.placeholder(tf.float32, data_x.shape)
        data_y_placeholder = tf.placeholder(tf.float32, data_y.shape)
        dataset_tf = tf.data.Dataset.from_tensor_slices(
            (data_x_placeholder, data_y_placeholder)).shuffle(
                buffer_size=10000).batch(batch_size).repeat()
        # create data iterator
        iter = dataset_tf.make_initializable_iterator(
        )  # dataset.make_one_shot_iterator()
        train_loader = iter.get_next()
        # init data
        self._sess.run(iter.initializer,
                       feed_dict={
                           data_x_placeholder: data_x,
                           data_y_placeholder: data_y
                       })

        logger = Logger(self._logdir, csvname='log_loss')

        if is_PreTrain is True:
            input_tensors = [
                self._pretrain_op, self._summary_op, self._meta_val_loss,
                self._meta_train_loss, self.total_loss1, self.total_losses2
            ]
        else:
            input_tensors = [
                self._meta_train_op, self._summary_op, self._meta_val_loss,
                self._meta_train_loss, self.total_loss1, self.total_losses2
            ]
        for epoch in range(max_epochs):
            for i in range(int(LengthOfData / batch_size)):

                (batch_input, batch_target) = self._sess.run(train_loader)

                feed_dict = {
                    self._meta_train_x: batch_input[:, :self._K, :],
                    self._meta_train_y: batch_target[:, :self._K, :],
                    self._meta_val_x: batch_input[:, self._K:, :],
                    self._meta_val_y: batch_target[:, self._K:, :]
                }

                _, summary_str, meta_val_loss, meta_train_loss, meta_loss1, meta_losses2 = \
                    self._sess.run(input_tensors, feed_dict)

                if i % LOG_FREQ == 0:
                    log.info(
                        "Epoch: {}/{} Step: {}/{}, Meta train loss: {:.4f}, Meta val loss: {:.4f}"
                        .format(epoch, max_epochs, i,
                                int(LengthOfData / batch_size),
                                meta_train_loss, meta_val_loss))

            # Log/TF_board/Save/Evaluate
            if epoch % SUMMARY_FREQ == 0:
                self._writer.add_summary(summary_str, epoch)
            # if epoch % LOG_FREQ == 0:
            #     log.info("Step: {}/{}, Meta train loss: {:.4f}, Meta val loss: {:.4f}".format(
            #         epoch, int(max_epochs), meta_train_loss, meta_val_loss))

            if (epoch + 1) % SAVE_FREQ == 0:
                log.infov("Save checkpoint-{}".format(epoch))
                self._saver.save(self._sess,
                                 os.path.join(self._checkpoint_dir,
                                              'checkpoint'),
                                 global_step=epoch)
            if (epoch + 1) % EVAL_FREQ == 0:
                train_loss_mean, val_loss_mean = self.evaluate(
                    dataset, 2, False, task_range=self._task_range)
                logger.log({
                    'epoch': epoch,
                    'meta_val_loss': meta_val_loss,
                    'meta_train_loss': meta_train_loss,
                    'meta_loss1': meta_loss1,
                    'meta_loss2': meta_losses2,
                    'val_tain_loss_mean': train_loss_mean,
                    'val_val_loss_mean': val_loss_mean,
                })
                logger.write(display=False)
        #close logger
        logger.close()
コード例 #7
0
ファイル: Dynamics.py プロジェクト: Jerryxiaoyu/local_mpc
    def __init__(self,
                 env_name,
                 NumOfExp,
                 model_type,
                 loss_type,
                 dim_input,
                 dim_output,
                 beta,
                 max_epochs,
                 is_train,
                 norm,
                 task_Note='a',
                 **kwargs):
        '''
        model_tpye: choose model tpye for each task, choice: ('fc',)
        loss_type:  choose the form of the objective function
        dim_input:  input dimension
        dim_output: desired output dimension
        alpha:      fixed learning rate to calculate the gradient
        beta:       learning rate used for Adam Optimizer
        K:          perform K-shot learning
        batch_size: number of tasks sampled in each iteration
        '''
        self._sess = utils.get_session(1)
        self._is_train = is_train
        #self._MAML_model = MAML_model

        self._norm = norm
        self._dim_input = dim_input
        self._dim_output = dim_output

        if env_name == 'HalfCheetahEnvDisableEnv-v0' or env_name == 'HalfCheetahVaryingEnv-v0':
            self._traj_cost = self._traj_hc_cost
        elif env_name == 'AntDisableEnv-v0':
            self._traj_cost = self._traj_ant_cost
        else:
            assert print('traj cost should be defined !')

        self.beta = beta
        self._avoid_second_derivative = False
        self._task_Note = task_Note
        self._task_name = 'Dynamics.{}_gym-EXP_{}'.format(
            env_name, self._task_Note)
        log.infov('Task name: {}'.format(self._task_name))

        self._logdir = kwargs['logdir']
        self._LenOfExp = NumOfExp

        # Build placeholder
        self._build_placeholder()
        # Build model
        model = self._import_model(model_type)
        self._construct_weights = model.construct_weights
        self._contruct_forward = model.construct_forward
        # Loss function

        self._loss_fn = self._get_loss_fn(loss_type)
        self._build_graph(dim_input, dim_output, norm=norm)
        # Misc
        self._summary_dir = os.path.join(self._logdir, 'log', self._task_name)
        self._checkpoint_dir = os.path.join(self._logdir, 'checkpoint',
                                            self._task_name)
        self._saver = tf.train.Saver(max_to_keep=10)
        if self._is_train:
            if not os.path.exists(self._summary_dir):
                os.makedirs(self._summary_dir)
            self._writer = tf.summary.FileWriter(self._summary_dir,
                                                 self._sess.graph)
            if not os.path.exists(self._checkpoint_dir):
                os.makedirs(self._checkpoint_dir)
        # Initialize all variables
        log.infov("Initialize all variables")
        self._sess.run(tf.global_variables_initializer())
        self.t_exp = 0
        self.max_epochs = max_epochs

        print('weight[w1] ', self._weights['w1'].eval(session=self._sess))

        if kwargs['restore_checkpoint'] is None:
            restore_checkpoint = tf.train.latest_checkpoint(
                kwargs['restore_dir'])
        else:
            restore_checkpoint = kwargs['restore_checkpoint']
        self._saver.restore(self._sess, restore_checkpoint)
        log.infov('Load model: {}'.format(restore_checkpoint))

        print('weight[w1] ', self._weights['w1'].eval(session=self._sess))
コード例 #8
0
ファイル: MPC_main.py プロジェクト: Jerryxiaoyu/local_mpc
def main(args):
    tf.set_random_seed(args.seed)
    np.random.seed(args.seed)

    env_name = args.env_name  # HalfCheetah-v2  My3LineDirect-v1
    print(env_name)

    if args.env_name == 'HalfCheetahEnvDisableEnv-v0':
        cost_fn = cheetah_cost_fn
        sample_task_fun = np.random.randint
    elif args.env_name == 'HalfCheetahVaryingEnv-v0':
        cost_fn = cheetah_cost_fn
        sample_task_fun = np.random.uniform
    else:
        print('env is error!!! ')

    env = gym.make(env_name)
    dim_input = env.observation_space.shape[0] + env.action_space.shape[0]
    dim_output = env.observation_space.shape[0]

    logdir = configure_log_dir(logname=env_name, txt=args.note)
    # save args prameters
    with open(logdir + '/info.txt', 'wt') as f:
        print('Hello World!\n', file=f)
        print(args, file=f)

    mpc_horizon = args.mpc_horizon
    num_simulated_paths = args.simulated_paths  #10000

    dyn_model = Dynamics(
        args.env_name,
        args.NumOfExp,
        args.model_type,
        args.loss_type,
        dim_input,
        dim_output,
        beta=args.beta,  #args.beta,
        max_epochs=args.max_epochs,
        is_train=args.is_train,
        norm=args.norm,
        task_Note=args.note,
        restore_checkpoint=args.restore_checkpoint,
        restore_dir=args.restore_dir,
        logdir=logdir)

    mpc_controller = MPCcontroller(
        env=env,
        dyn_model=dyn_model,
        horizon=mpc_horizon,
        cost_fn=cost_fn,
        num_simulated_paths=num_simulated_paths,
    )
    logger = Logger(logdir, csvname='log')

    num_itr = args.num_itr
    experiences, costs = [], []
    print('MPC is beginning...')
    for itr in range(num_itr):
        reward, model_loss_mean = rollout(
            env,
            mpc_controller,
            task_goal=args.task_goal,
            dyn_model=dyn_model,
            experiences=experiences,
            NumOfExp=args.NumOfExp,
            horizon=args.horizon,
            cost_fn=cheetah_cost_fn,
            render=False,
            verbose=False,
            save_video=False,
            ignore_done=True,
        )

        #print(time.asctime( time.localtime(time.time()) ), ' itr :', itr, 'Average reward :' , cost)
        log.infov(
            "Itr {}/{} Accumulated Reward: {:.4f}  Model loss mean:{:.4f}".
            format(itr, num_itr, reward, model_loss_mean))

        logger.log({
            'itr': itr,
            'Accumulated Reward': reward,
            'Model loss mean': model_loss_mean,
        })

    print('MPC is over....')

    logger.write(display=False)