def initializeGPs(self): set_random_seeds(self.ini_seeds) x_ninit = self.x_ninit x = LatinDesign(self.xbounds, x_ninit) self.logger.debug('init x shape ={}'.format(x.shape)) self.logger.debug('init x={}'.format(x)) z = np.tile(self.zbounds[1, :], (x.shape[0], 1)) y = self.evaluate(x, z, resources=self.mff.getCost(z, mode="c"), mffname='mff') self.batch_idx += 1 self.logger.debug('y_init:{}'.format(y)) self.logger.debug("budget used after initialization:{}".format( self.budget_current)) self.logger.debug( "budget used after initializaiton(from mff):{}".format( self.mff.currentBudget)) self.logger.debug('y_init shape={}'.format(y.shape[0])) # init a surrogate self.gps = model_list[self.config['surrogate_name']]( X_init=x, Y_init=y, config=self.config['GPRS']) self.logger.debug( '********************check the init surrogate={}'.format( self.gps.model)) self.gps.update() self.verbose() self.logger.debug("end model initialization") self.iterations += 1 self.record(x, y, z)
def optimize(self): self.logger.debug("start optimization") if self.iterations == 0: self.initializeGPs() self._configure_acq() set_random_seeds(self.config['seeds']) while self.mff.currentBudget < self.budget: if self.checkUpdateGPs(): self.gps.update() self.verbose() self.iterations += 1 if self.iterations % self.warping_update_interval == 0: self.gps.unfix_warping_functions() else: self.gps.fix_warping_functions() xz = self.optimize_acq_f(n_iter=self.config['optniter'], method=self.method) self.logger.debug("new next to evaluate x = {}, z={}".format( xz[:, :-self.zdim], xz[:, -self.zdim:])) y = self.evaluate(xz[:, :-self.zdim], xz[:, -self.zdim:], resources=self.mff.getCost(xz[:, -self.zdim:], mode="c"), mffname='mff') self.logger.debug("new evaluate y = {}".format(y)) self.record(xz[:, :-self.zdim], xz[:, -self.zdim:], y) self.gps.add_data(xz, y) self.batch_idx += 1
def optimize(self): self.logger.debug("start optimization") if self.iterations == 0: self.initializeGPs() self._configure_acq() set_random_seeds(self.config['seeds']) while self.mff.currentBudget < self.budget: if self.checkUpdateGps(): self.gps.update() self.verbose() # produce a batch of K designs x = [] k = 0 gps_temp = deepcopy(self.gps) while k < self.K: x_k = self.optimize_acq_f(n_iter=self.config['optniter'], method=self.method) x.append(x_k) fantasies, _ = self.gps.predict(x_k) self.gps.add_data(x_k, fantasies) self.gps.update() k += 1 self.gps = deepcopy(gps_temp) if self.acq_name == 'ES' or self.acq_name == 'MES': self.acquisition.gps = self.gps self.acquisition._config_proposal_function() x = np.concatenate(x) self.logger.debug('new next to evaluate batch x={}'.format(x)) z = np.tile(self.zbounds[1, :], (x.shape[0], 1)) y = self.evaluate(x, z, resources=self.mff.getCost(z, mode='c'), mffname='mff') self.logger.debug('new evaluate batch y={}'.format(y)) for idx in range(x.shape[0]): self.iterations += 1 self.record(x[idx:idx + 1, :], y[idx:idx + 1, :], z[idx:idx + 1, :]) self.gps.add_data(x, y) self.batch_idx += 1
def optimize(self): self.logger.debug('start optimization') if self.iterations == 0: self.initialize() set_random_seeds(self.config['seeds']) while self.mff.currentBudget < self.budget: self.iterations += 1 np.random.seed(int(time.time() * 10**8 - int(time.time()) * 10**8)) x = np.random.uniform(low=self.xbounds[0, :], high=self.xbounds[1, :], size=(1, self.xdim)) np.random.seed(self.seeds) self.logger.debug('new next to evaluate x={}'.format(x)) z = np.tile(self.zbounds[1, :], (x.shape[0], 1)) y = self.evaluate(x, z, resources=self.mff.getCost(z, mode='c')) self.logger.debug("new evaluate y={}".format(y)) self.record(x, y, z) self.batch_idx += 1
def initialize(self): set_random_seeds(self.ini_seeds) x_ninit = self.x_ninit x = LatinDesign(self.xbounds, x_ninit) self.logger.debug('init x shape ={}'.format(x.shape)) self.logger.debug('init x={}'.format(x)) z = np.tile(self.zbounds[1, :], (x.shape[0], 1)) y = self.mff.eval(x, z, mode="c").reshape((-1, 1)) self.batch_idx += 1 self.logger.debug('y_init:{}'.format(y)) self.logger.debug('budget used after initialization:{}'.format( self.budget_current)) self.logger.debug( 'budget used after initialization(from mff):{}'.format( self.mff.currentBudget)) self.logger.debug('y_init shape={}'.format(y.shape[0])) self.logger.debug('end initialization') self.iterations += 1 self.record(x, y, z)
def optimize(self): self.logger.debug("start optimization") if self.iterations == 0: self.initializeGPs() self._configure_acq() set_random_seeds(self.config['seeds']) while self.mff.currentBudget < self.budget: if self.checkUpdateGPs(): self.gps.update() self.verbose() self.iterations += 1 x = self.optimize_acq_f(n_iter=self.config['optniter'], method=self.method) self.logger.debug('new next to evaluate x={}'.format(x)) z = np.tile(self.zbounds[1, :], (x.shape[0], 1)) y = self.evaluate(x, z, resources=self.mff.getCost(z, mode="c"), mffname='mff') self.logger.debug("new evaluate y= {}".format(y)) self.record(x, y, z) self.gps.add_data(x, y) self.batch_idx += 1
def main_eval(args): eval_c_file = os.path.join(args.exp_dir, 'config.json') eval_a_file = os.path.join(args.exp_dir, 'args.json') eval_p_file = os.path.join(args.exp_dir, 'model', args.model_dir, 'checkpoint_' + args.model_name + '.tar') # Read args from saved args file with open(eval_a_file) as f: eval_args = json.load(f) # Read configs from saved config file default_config = default_cfgs.config_dict[eval_args['m']] with open(eval_c_file) as f: eval_config = json.load(f) config = deep_update_dict(eval_config, default_config) set_random_seeds(eval_args['seeds']) file_str = create_save_path( eval_args, ['seeds'], 'eval', 'exp_dir=' + args.method + args.exp_dir.replace('/', '-')) config['data_folder_experiment'] = file_str # Create experiment folder if not os.path.exists(file_str): os.makedirs(file_str) # configure and create logging logger = create_logging('output.log', file_str, 'w', args.debug) logger.info('Current config content is :{}'.format(config)) writer = SummaryWriter(log_dir=file_str) env = CLEnv(select_environment(config['env']['env_name'])(config, 6)) env.set_new_design(args.design) # Store args save_args2json('args.json', file_str, args) total_steps = config["op_algorithm_config"]["time_limits"] + 49 env.set_task_t(total_steps) s_norm, actor, critic = load_model(eval_p_file) acc_step = 0 acc_rwd = 0 for i in range(args.ei): if args.save_video: video_path = os.path.join(file_str, 'videos', str(i) + '-th-iteration') os.makedirs(video_path, exist_ok=True) obs, info = env.reset(return_info=True) obs = torch.Tensor(obs).float() done = False ite_step = 0 ite_rwd = 0 vis_info = {} while True: vis_info['design'] = env.get_current_design() vis_info['steps'] = ite_step for vis in vis_checklist: if vis in info: vis_info[vis] = info[vis] if args.save_video: rgb = env.render('rgb_array') rgb = Image.fromarray(rgb) traj = Image.new('RGB', (rgb.size[0] + 130, rgb.size[1]), (255, 255, 255)) if 'pose(m)' in info: x, y, z = info['pose(m)'] x = 8 * x y = 8 * y vis_info.setdefault("traj", []).append( (x + 10, traj.size[1] - (y + traj.size[1] / 3))) elif 'pos' in info: x, y, z = info['pos'] x = 8 * x y = 8 * y vis_info.setdefault("traj", []).append( (x + 10, traj.size[1] - (y + traj.size[1] / 3))) all = Image.new('RGB', (traj.size[0] + rgb.size[0], rgb.size[1])) draw_rgb = ImageDraw.Draw(rgb) draw_traj = ImageDraw.Draw(traj) add_text(draw_traj, vis_info, ite_step, total_steps) if 'traj' in vis_info: draw_traj.point(vis_info['traj'], fill='red') all.paste(rgb, (0, 0)) all.paste(traj, (rgb.size[0], 0)) all.save( os.path.join(video_path, '{:05d}.png'.format(ite_step))) obst = torch.FloatTensor(obs) s_norm.record(obst) obst_norm = s_norm(obst) with torch.no_grad(): ac = actor.act_deterministic(obst_norm) ac = ac.cpu().numpy() if done: break obs, rwd, done, info = env.step(ac) time.sleep(0.0165) ite_step += 1 ite_rwd += rwd info['rewards'] = ite_rwd info['ave_rewards'] = ite_rwd / ite_step acc_rwd += rwd acc_step += 1 logger.info( 'return of {}-th iteration is {}, and num of steps is {}, return per step={}' .format(i, ite_rwd, ite_step, ite_rwd / 250)) if args.save_video: sp.call([ 'ffmpeg', '-loglevel', 'panic', '-r', '60', '-f', 'image2', '-i', os.path.join(video_path, '%05d.png'), '-vcodec', 'libx264', '-pix_fmt', 'yuv420p', os.path.join( video_path, str(i) + '-th-ite_rwd=' + str(int(ite_rwd)) + '.mp4') ]) avg_step = acc_step / args.ei avg_rwd = acc_rwd / args.ei logger.info( 'average return of {} iterations is {}, and num of steps is {}, average return per step={}' .format(args.ei, avg_rwd, avg_step, avg_rwd / avg_step))
def initializeGPs(self): set_random_seeds(self.ini_seeds) x_ninit = self.x_ninit x = LatinDesign(self.xbounds, x_ninit) self.logger.debug('init x shape={}'.format(x.shape)) self.logger.debug('init x={}'.format(x)) if self.config['ini_manner'] == 'latin_cross': z_ninit = self.z_ninit z = LatinDesign(self.zbounds, z_ninit) z = self._roundfidelity(z) z = z[np.where(z.sum(axis=1) != sum(self.zbounds[1, :]))[0], :] self.logger.debug('init z shape={}'.format(z.shape)) self.logger.debug('init z={}'.format(z)) z_target = np.concatenate( [self.zbounds[1:, :] for i in range(x.shape[0])]) x_ = np.random.permutation( LatinDesign(self.xbounds, min(50, self.z_ninit)))[:z.shape[0], :] x = np.concatenate([x, x_]) z = np.concatenate([z_target, z]) self.logger.debug('final ini z={}'.format(z)) self.logger.debug('final ini x={}'.format(x)) else: raise NotImplementedError xz = np.concatenate((x, z), axis=1) self.xz_ninit = xz.shape[0] self.logger.debug("xz_ini cost={}".format( sum(self.mff.getCost(xz[:, -self.zdim:])))) self.logger.debug("xz_init shape={}".format(xz.shape)) self.logger.debug("xz_init={}".format(xz)) y = self.evaluate(xz[:, :-self.zdim], xz[:, -self.zdim:], resources=self.mff.getCost(xz[:, -self.zdim:], mode="c"), mffname='mff') self.batch_idx += 1 self.logger.info("y_init:{}".format(y)) self.logger.info("budget used after initialization:{}".format( self.budget_current)) self.logger.info( "budget used after initialization(from mff):{}".format( self.mff.currentBudget)) self.logger.debug("y_init shape={}".format(y.shape[0])) #init a surrogate self.gps = model_list[self.config['surrogate_name']]( X_init=xz, Y_init=y, z_min=self.zbounds[0, :], z_max=self.zbounds[1, :], config=self.config['GPRS']) self.logger.debug('**************Check the init surrogate={}'.format( self.gps.model)) self.gps.update() self.gps.fix_warping_functions() self.verbose() self.logger.debug("end model initialization") self.iterations += 1 self.record(xz[:, :-self.zdim], xz[:, -self.zdim:], y)
def main(args): # Read config from config files default_config = default_cfgs.config_dict[args.m] if args.config: new_config_file = os.path.join('./nfwbo/configs', args.robot, args.config + '.json') with open(new_config_file) as f: new_config = json.load(f) config = deep_update_dict(new_config, default_config) else: config = default_config # check possible bugs in config settings check_config(config, args) set_random_seeds(args.seeds) # change config file by arguments from args config["op_algorithm_config"]["seeds"] = args.seeds if args.dev: config["op_algorithm_config"]["max_num_epochs_per_design"] = 1 config["op_algorithm_config"]["time_limits"] = 1 config["op_algorithm_config"]["x_ninit"] = 1 config["op_algorithm_config"]["z_ninit"] = 1 config["op_algorithm_config"]["optniter"] = 1 config["op_algorithm_config"]["optninit"] = 1 config["op_algorithm_config"]["batch_size"] = 2 config['rl_algorithm_config']["algo_params"]["checkpoint_batch"] = -1 config['force_manual_design'] = args.h file_str = create_save_path(args, ['seeds']) + '_' + args.extra if args.h: file_str = file_str + '_human_design' config['rl_algorithm_config']["algo_params"]["checkpoint_batch"] = 240 config['op_algorithm_config']["set_mff_inference"] = False config['data_folder_experiment'] = file_str plt_dir = os.path.join(file_str, 'plt') config['op_algorithm_config']['plt_dir'] = plt_dir model_path = file_str + "/model" config["rl_algorithm_config"]["algo_params"]["save_dir"] = model_path # Create experiment folder if not os.path.exists(file_str): os.makedirs(file_str) #create model's folder if not os.path.exists(model_path): os.makedirs(model_path) if not os.path.exists(plt_dir): os.makedirs(plt_dir) # Store config with open(os.path.join(file_str, 'config.json'), 'w') as fd: fd.write(json.dumps(config, indent=2)) # Store args save_args2json('args.json', file_str, args) # configure and create logging logger = create_logging('output.log', file_str, 'w', args.debug) logger.info('Current config content is :{}'.format(config)) writer = SummaryWriter(log_dir=file_str) do = DesignOptimize(logger, writer, config) do.optimize() writer.close()