def __init__(self, dataset, model_type, loss_type, dim_input, dim_output, alpha, beta, K, batch_size, is_train, num_updates, norm): ''' model_tpye: choose model tpye for each task, choice: ('fc',) loss_type: choose the form of the objective function dim_input: input dimension dim_output: desired output dimension alpha: fixed learning rate to calculate the gradient beta: learning rate used for Adam Optimizer K: perform K-shot learning batch_size: number of tasks sampled in each iteration ''' self._sess = utils.get_session(1) self._is_train = is_train self._dataset = dataset self._alpha = alpha self._K = K self._norm = norm self._dim_input = dim_input self._dim_output = dim_output self._batch_size = batch_size self._num_updates = num_updates self._meta_optimizer = tf.train.AdamOptimizer(beta) self._avoid_second_derivative = False self._task_name = 'MAML.{}_{}-shot_{}-updates_{}-batch_norm-{}'.format( dataset.name, self._K, self._num_updates, self._batch_size, self._norm) log.infov('Task name: {}'.format(self._task_name)) # Build placeholder self._build_placeholder() # Build model model = self._import_model(model_type) self._construct_weights = model.construct_weights self._contruct_forward = model.construct_forward # Loss function self._loss_fn = self._get_loss_fn(loss_type) self._build_graph(dim_input, dim_output, norm=norm) # Misc self._summary_dir = os.path.join('log', self._task_name) self._checkpoint_dir = os.path.join('checkpoint', self._task_name) self._saver = tf.train.Saver(max_to_keep=10) if self._is_train: if not os.path.exists(self._summary_dir): os.makedirs(self._summary_dir) self._writer = tf.summary.FileWriter(self._summary_dir, self._sess.graph) if not os.path.exists(self._checkpoint_dir): os.makedirs(self._checkpoint_dir) # Initialize all variables log.infov("Initialize all variables") self._sess.run(tf.global_variables_initializer())
def learn(self, batch_size, dataset, max_steps): for step in range(int(max_steps)): meta_val_loss, meta_train_loss, summary_str = self._single_train_step( dataset, batch_size, step) # Log/TF_board/Save/Evaluate if step % SUMMARY_FREQ == 0: self._writer.add_summary(summary_str, step) if step % LOG_FREQ == 0: log.info( "Step: {}/{}, Meta train loss: {:.4f}, Meta val loss: {:.4f}" .format(step, int(max_steps), meta_train_loss, meta_val_loss)) if step % SAVE_FREQ == 0: log.infov("Save checkpoint-{}".format(step)) self._saver.save(self._sess, os.path.join(self._checkpoint_dir, 'checkpoint'), global_step=step) if step % EVAL_FREQ == 0: self.evaluate(dataset, 100, False)
def evaluate(self, dataset, test_steps, draw, **kwargs): if not self._is_train: assert kwargs['restore_checkpoint'] is not None or \ kwargs['restore_dir'] is not None if kwargs['restore_checkpoint'] is None: restore_checkpoint = tf.train.latest_checkpoint( kwargs['restore_dir']) else: restore_checkpoint = kwargs['restore_checkpoint'] self._saver.restore(self._sess, restore_checkpoint) log.infov('Load model: {}'.format(restore_checkpoint)) if draw: draw_dir = os.path.join('vis', self._task_name) if not os.path.exists(draw_dir): os.makedirs(draw_dir) accumulated_val_loss = [] accumulated_train_loss = [] for step in tqdm(range(test_steps)): output, val_loss, train_loss, amplitude, phase, inp = \ self._single_test_step(dataset, 1) if not self._is_train and draw: # visualize one by one for am, ph in zip(amplitude, phase): dataset.visualize(am, ph, inp[:, self._K:, :], output, path=os.path.join( draw_dir, '{}.png'.format(step))) accumulated_val_loss.append(val_loss) accumulated_train_loss.append(train_loss) val_loss_mean = sum(accumulated_val_loss) / test_steps train_loss_mean = sum(accumulated_train_loss) / test_steps log.infov( "[Evaluate] Meta train loss: {:.4f}, Meta val loss: {:.4f}".format( train_loss_mean, val_loss_mean))
def evaluate2(self, dataset, test_steps, draw, load_model=False, task_range=(0, 7), **kwargs): ''' evaluate meta-model over uniform distribution tasks to get loss :param dataset: :param test_steps: :param draw: :param load_model: :param task_range: :param kwargs: :return: ''' if load_model: assert kwargs['restore_checkpoint'] is not None or \ kwargs['restore_dir'] is not None if kwargs['restore_checkpoint'] is None: restore_checkpoint = tf.train.latest_checkpoint( kwargs['restore_dir']) else: restore_checkpoint = kwargs['restore_checkpoint'] self._saver.restore(self._sess, restore_checkpoint) log.infov('Load model: {}'.format(restore_checkpoint)) for tm in range(5): accumulated_val_loss = [] accumulated_train_loss = [] tasks = [] for step in tqdm(range(test_steps)): #task = self._sample_task_fun(task_range[0], task_range[1] ,(1,)) task = np.array([ 0 + step * 0.05, ]) output, val_loss, train_loss, x, y = self._single_test_step( dataset, 1, task=task) if load_model and draw: # visualize one by one draw_dir = os.path.join( self._logdir, 'vis', self._task_name, 'exp_' + str(step) + '_task_num' + str(task[0]) + '_loss' + str(val_loss)) if not os.path.exists(draw_dir): os.makedirs(draw_dir) dataset.visualize(x[:, self._K:, :], y[:, self._K:, :], output, draw_dir) accumulated_val_loss.append(val_loss) accumulated_train_loss.append(train_loss) tasks.append(task) val_loss_mean = sum(accumulated_val_loss) / test_steps train_loss_mean = sum(accumulated_train_loss) / test_steps log.infov( "[Evaluate] Meta train loss: {:.4f}, Meta val loss: {:.4f}". format(train_loss_mean, val_loss_mean)) data_tmp = np.array(accumulated_val_loss).reshape((-1, 1)) if tm == 0: data2 = data_tmp else: data2 = np.concatenate((data2, data_tmp), axis=1) log_dir = os.path.join('vis', self._task_name) logger = Logger(log_dir, csvname='log') data1 = np.array(tasks) data = np.concatenate((data1, data2), axis=1) logger.log_table2csv(data)
def evaluate(self, dataset, test_steps, draw, load_model=False, task_range=(0, 7), task_type='rand', **kwargs): if load_model: assert kwargs['restore_checkpoint'] is not None or \ kwargs['restore_dir'] is not None if kwargs['restore_checkpoint'] is None: restore_checkpoint = tf.train.latest_checkpoint( kwargs['restore_dir']) else: restore_checkpoint = kwargs['restore_checkpoint'] self._saver.restore(self._sess, restore_checkpoint) log.infov('Load model: {}'.format(restore_checkpoint)) tasks = [] accumulated_val_loss, accumulated_val_loss_post = [], [] accumulated_train_loss = [] for step in tqdm(range(test_steps)): if task_type == 'rand': task = self._sample_task_fun(task_range[0], task_range[1], (1, )) elif task_type == 'lin': task = np.array([ 0 + step * 0.05, ]) else: print('please check task type!') output, val_loss, train_loss, x, y = self._single_test_step( dataset, num_tasks=1, task=task) if load_model and draw: # visualize one by one draw_dir = os.path.join( self._logdir, 'vis', self._task_name, 'exp_' + str(step) + '_task_num' + str(task[0]) + '_loss' + str(val_loss)) if not os.path.exists(draw_dir): os.makedirs(draw_dir) dataset.visualize(x[:, self._K:, :], y[:, self._K:, :], output, draw_dir) accumulated_val_loss.append(val_loss) accumulated_train_loss.append(train_loss) tasks.append(task) val_loss_mean = sum(accumulated_val_loss) / test_steps train_loss_mean = sum(accumulated_train_loss) / test_steps log.infov( "[Evaluate] Meta train loss: {:.4f}, Meta val loss: {:.4f}".format( train_loss_mean, val_loss_mean)) if load_model: logger = Logger(self._logdir, csvname='log_test_loss') for i in range(test_steps): logger.log({ 'num': i, 'task': tasks[i][0], 'val_loss': accumulated_val_loss[i], 'train_loss': accumulated_train_loss[i], }) logger.write(display=False) logger.close() return train_loss_mean, val_loss_mean
def learn(self, batch_size, dataset, max_epochs, is_PreTrain=False, **kwargs): # collect data dataset.get_dataset(resample=True, task=None, controller='Rand', task_range=self._task_range, task_fun=self._sample_task_fun) data_x, data_y, LengthOfData = dataset.get_batch(batch_size) # load data data_x_placeholder = tf.placeholder(tf.float32, data_x.shape) data_y_placeholder = tf.placeholder(tf.float32, data_y.shape) dataset_tf = tf.data.Dataset.from_tensor_slices( (data_x_placeholder, data_y_placeholder)).shuffle( buffer_size=10000).batch(batch_size).repeat() # create data iterator iter = dataset_tf.make_initializable_iterator( ) # dataset.make_one_shot_iterator() train_loader = iter.get_next() # init data self._sess.run(iter.initializer, feed_dict={ data_x_placeholder: data_x, data_y_placeholder: data_y }) logger = Logger(self._logdir, csvname='log_loss') if is_PreTrain is True: input_tensors = [ self._pretrain_op, self._summary_op, self._meta_val_loss, self._meta_train_loss, self.total_loss1, self.total_losses2 ] else: input_tensors = [ self._meta_train_op, self._summary_op, self._meta_val_loss, self._meta_train_loss, self.total_loss1, self.total_losses2 ] for epoch in range(max_epochs): for i in range(int(LengthOfData / batch_size)): (batch_input, batch_target) = self._sess.run(train_loader) feed_dict = { self._meta_train_x: batch_input[:, :self._K, :], self._meta_train_y: batch_target[:, :self._K, :], self._meta_val_x: batch_input[:, self._K:, :], self._meta_val_y: batch_target[:, self._K:, :] } _, summary_str, meta_val_loss, meta_train_loss, meta_loss1, meta_losses2 = \ self._sess.run(input_tensors, feed_dict) if i % LOG_FREQ == 0: log.info( "Epoch: {}/{} Step: {}/{}, Meta train loss: {:.4f}, Meta val loss: {:.4f}" .format(epoch, max_epochs, i, int(LengthOfData / batch_size), meta_train_loss, meta_val_loss)) # Log/TF_board/Save/Evaluate if epoch % SUMMARY_FREQ == 0: self._writer.add_summary(summary_str, epoch) # if epoch % LOG_FREQ == 0: # log.info("Step: {}/{}, Meta train loss: {:.4f}, Meta val loss: {:.4f}".format( # epoch, int(max_epochs), meta_train_loss, meta_val_loss)) if (epoch + 1) % SAVE_FREQ == 0: log.infov("Save checkpoint-{}".format(epoch)) self._saver.save(self._sess, os.path.join(self._checkpoint_dir, 'checkpoint'), global_step=epoch) if (epoch + 1) % EVAL_FREQ == 0: train_loss_mean, val_loss_mean = self.evaluate( dataset, 2, False, task_range=self._task_range) logger.log({ 'epoch': epoch, 'meta_val_loss': meta_val_loss, 'meta_train_loss': meta_train_loss, 'meta_loss1': meta_loss1, 'meta_loss2': meta_losses2, 'val_tain_loss_mean': train_loss_mean, 'val_val_loss_mean': val_loss_mean, }) logger.write(display=False) #close logger logger.close()
def __init__(self, env_name, NumOfExp, model_type, loss_type, dim_input, dim_output, beta, max_epochs, is_train, norm, task_Note='a', **kwargs): ''' model_tpye: choose model tpye for each task, choice: ('fc',) loss_type: choose the form of the objective function dim_input: input dimension dim_output: desired output dimension alpha: fixed learning rate to calculate the gradient beta: learning rate used for Adam Optimizer K: perform K-shot learning batch_size: number of tasks sampled in each iteration ''' self._sess = utils.get_session(1) self._is_train = is_train #self._MAML_model = MAML_model self._norm = norm self._dim_input = dim_input self._dim_output = dim_output if env_name == 'HalfCheetahEnvDisableEnv-v0' or env_name == 'HalfCheetahVaryingEnv-v0': self._traj_cost = self._traj_hc_cost elif env_name == 'AntDisableEnv-v0': self._traj_cost = self._traj_ant_cost else: assert print('traj cost should be defined !') self.beta = beta self._avoid_second_derivative = False self._task_Note = task_Note self._task_name = 'Dynamics.{}_gym-EXP_{}'.format( env_name, self._task_Note) log.infov('Task name: {}'.format(self._task_name)) self._logdir = kwargs['logdir'] self._LenOfExp = NumOfExp # Build placeholder self._build_placeholder() # Build model model = self._import_model(model_type) self._construct_weights = model.construct_weights self._contruct_forward = model.construct_forward # Loss function self._loss_fn = self._get_loss_fn(loss_type) self._build_graph(dim_input, dim_output, norm=norm) # Misc self._summary_dir = os.path.join(self._logdir, 'log', self._task_name) self._checkpoint_dir = os.path.join(self._logdir, 'checkpoint', self._task_name) self._saver = tf.train.Saver(max_to_keep=10) if self._is_train: if not os.path.exists(self._summary_dir): os.makedirs(self._summary_dir) self._writer = tf.summary.FileWriter(self._summary_dir, self._sess.graph) if not os.path.exists(self._checkpoint_dir): os.makedirs(self._checkpoint_dir) # Initialize all variables log.infov("Initialize all variables") self._sess.run(tf.global_variables_initializer()) self.t_exp = 0 self.max_epochs = max_epochs print('weight[w1] ', self._weights['w1'].eval(session=self._sess)) if kwargs['restore_checkpoint'] is None: restore_checkpoint = tf.train.latest_checkpoint( kwargs['restore_dir']) else: restore_checkpoint = kwargs['restore_checkpoint'] self._saver.restore(self._sess, restore_checkpoint) log.infov('Load model: {}'.format(restore_checkpoint)) print('weight[w1] ', self._weights['w1'].eval(session=self._sess))
def main(args): tf.set_random_seed(args.seed) np.random.seed(args.seed) env_name = args.env_name # HalfCheetah-v2 My3LineDirect-v1 print(env_name) if args.env_name == 'HalfCheetahEnvDisableEnv-v0': cost_fn = cheetah_cost_fn sample_task_fun = np.random.randint elif args.env_name == 'HalfCheetahVaryingEnv-v0': cost_fn = cheetah_cost_fn sample_task_fun = np.random.uniform else: print('env is error!!! ') env = gym.make(env_name) dim_input = env.observation_space.shape[0] + env.action_space.shape[0] dim_output = env.observation_space.shape[0] logdir = configure_log_dir(logname=env_name, txt=args.note) # save args prameters with open(logdir + '/info.txt', 'wt') as f: print('Hello World!\n', file=f) print(args, file=f) mpc_horizon = args.mpc_horizon num_simulated_paths = args.simulated_paths #10000 dyn_model = Dynamics( args.env_name, args.NumOfExp, args.model_type, args.loss_type, dim_input, dim_output, beta=args.beta, #args.beta, max_epochs=args.max_epochs, is_train=args.is_train, norm=args.norm, task_Note=args.note, restore_checkpoint=args.restore_checkpoint, restore_dir=args.restore_dir, logdir=logdir) mpc_controller = MPCcontroller( env=env, dyn_model=dyn_model, horizon=mpc_horizon, cost_fn=cost_fn, num_simulated_paths=num_simulated_paths, ) logger = Logger(logdir, csvname='log') num_itr = args.num_itr experiences, costs = [], [] print('MPC is beginning...') for itr in range(num_itr): reward, model_loss_mean = rollout( env, mpc_controller, task_goal=args.task_goal, dyn_model=dyn_model, experiences=experiences, NumOfExp=args.NumOfExp, horizon=args.horizon, cost_fn=cheetah_cost_fn, render=False, verbose=False, save_video=False, ignore_done=True, ) #print(time.asctime( time.localtime(time.time()) ), ' itr :', itr, 'Average reward :' , cost) log.infov( "Itr {}/{} Accumulated Reward: {:.4f} Model loss mean:{:.4f}". format(itr, num_itr, reward, model_loss_mean)) logger.log({ 'itr': itr, 'Accumulated Reward': reward, 'Model loss mean': model_loss_mean, }) print('MPC is over....') logger.write(display=False)