def __init__(self, config, replay_buffer, predict_reward_and_status_func): self.config = config self.replay_buffer = replay_buffer self.target_potential_point = PotentialPoint.from_config(config)[-1] self.predict_reward_and_status_func = predict_reward_and_status_func # the following buffer saves the transition we are about to add self.augmented_buffer = []
def print_model_stats(pre_trained_reward_network, test_batch_size, sess): # read the data test = load_data_from(os.path.join("supervised_data", "test"), max_read=10 * test_batch_size) print(len(test)) # partition to train and test random.shuffle(test) openrave_manager = OpenraveManager( 0.001, PotentialPoint.from_config(pre_trained_reward_network.config)) sess.run(tf.global_variables_initializer()) # run test for one (random) batch random.shuffle(test) test_batch = oversample_batch(test, 0, test_batch_size) test_batch, test_rewards, test_status = get_batch_and_labels( test_batch, openrave_manager) reward_prediction, status_prediction = pre_trained_reward_network.make_prediction( *([sess] + test_batch)) # see what happens for different reward classes: ( goal_rewards_stats, collision_rewards_stats, other_rewards_stats, ) = compute_stats_per_class(test_status, test_rewards, status_prediction, reward_prediction) print("before loading weights") print("goal mean_error {} max_error {} accuracy {}".format( *goal_rewards_stats)) print("collision mean_error {} max_error {} accuracy {}".format( *collision_rewards_stats)) print("other mean_error {} max_error {} accuracy {}".format( *other_rewards_stats)) # load weights pre_trained_reward_network.load_weights(sess) # run test for one (random) batch random.shuffle(test) test_batch = oversample_batch(test, 0, test_batch_size) test_batch, test_rewards, test_status = get_batch_and_labels( test_batch, openrave_manager) reward_prediction, status_prediction = pre_trained_reward_network.make_prediction( *([sess] + test_batch)) # see what happens for different reward classes: ( goal_rewards_stats, collision_rewards_stats, other_rewards_stats, ) = compute_stats_per_class(test_status, test_rewards, status_prediction, reward_prediction) print("after loading weights") print("goal mean_error {} max_error {} accuracy {}".format( *goal_rewards_stats)) print("collision mean_error {} max_error {} accuracy {}".format( *collision_rewards_stats)) print("other mean_error {} max_error {} accuracy {}".format( *other_rewards_stats))
def _generate_single_workspace(self, workspace_id): while True: a = datetime.datetime.now() workspace_params = self.generator.generate_workspace() self.openrave_manager = OpenraveManager(self.config['openrave_rl']['segment_validity_step'], PotentialPoint.from_config(self.config)) self.openrave_manager.loaded_params_path = None self.openrave_manager.load_params(workspace_params, '') successful_trajectories_count = 0 i = 0 for i in range(self.test_trajectories): # see if there is hope trajectories_left = self.test_trajectories - i if trajectories_left + successful_trajectories_count < self.trajectories_required_to_pass: print 'no hope to get the required ratio' break # try a trajectory successful_trajectories_count += self._try_plan(workspace_params, self.openrave_manager) is not None # if successful update the status if successful_trajectories_count >= self.trajectories_required_to_pass: print 'workspace found' save_path = os.path.join(output_dir, '{}_workspace.pkl'.format(workspace_id)) workspace_params.save(save_path) return b = datetime.datetime.now() print 'trajectories tried {}'.format(i) print 'success count {}'.format(successful_trajectories_count) print 'time since start {}'.format(b - a) print ''
def _generate_single_workspace(self, workspace_id): while True: a = datetime.datetime.now() workspace_params = self.generator.generate_workspace() self.openrave_manager = OpenraveManager( self.config["openrave_rl"]["segment_validity_step"], PotentialPoint.from_config(self.config), ) self.openrave_manager.loaded_params_path = None self.openrave_manager.load_params(workspace_params, "") successful_trajectories_count = 0 i = 0 for i in range(self.test_trajectories): # see if there is hope trajectories_left = self.test_trajectories - i if (trajectories_left + successful_trajectories_count < self.trajectories_required_to_pass): print("no hope to get the required ratio") break # try a trajectory successful_trajectories_count += (self._try_plan( workspace_params, self.openrave_manager) is not None) # if successful update the status if successful_trajectories_count >= self.trajectories_required_to_pass: print("workspace found") save_path = os.path.join( output_dir, "{}_workspace.pkl".format(workspace_id)) workspace_params.save(save_path) return b = datetime.datetime.now() print("trajectories tried {}".format(i)) print("success count {}".format(successful_trajectories_count)) print("time since start {}".format(b - a)) print("")
def __init__(self, config, rollout_manager, results_directory): self.config = config self.rollout_manager = rollout_manager self.results_directory = results_directory self._make_dir(self.results_directory) potential_points_path = os.path.join(self.results_directory, 'potential_points.p') pickle.dump(PotentialPoint.from_config(config), open(potential_points_path, 'w')) self._is_vision = config['model']['consider_image']
def get_manager_for_workspace(workspace_id, config): directory = os.path.abspath( os.path.expanduser(config['data']['directory'])) workspace_dir = os.path.join(directory, workspace_id) potential_points = PotentialPoint.from_config(config) openrave_manager = OpenraveManager( config['data']['joint_segment_validity_step'], potential_points) workspace_params = WorkspaceParams.load_from_file( data_filepaths.get_workspace_params_path(workspace_dir)) openrave_manager.load_params(workspace_params) return openrave_manager, workspace_dir
def __init__(self, config): self.action_step_size = config['openrave_rl']['action_step_size'] self.goal_sensitivity = config['openrave_rl']['goal_sensitivity'] self.keep_alive_penalty = config['openrave_rl']['keep_alive_penalty'] self.truncate_penalty = config['openrave_rl']['truncate_penalty'] self.openrave_manager = OpenraveManager( config['openrave_rl']['segment_validity_step'], PotentialPoint.from_config(config)) self.current_joints = None self.goal_joints = None self.start_joints = None self.traj = None
def produce_transitions(data_dir, cache_dir): print "producing transition data from original trajectories at {}".format( data_dir) assert os.path.exists(data_dir) if os.path.exists(cache_dir): print "found cache dir at {}, assuming all transitions are present there (if not delete the directory)".format( cache_dir) return print "cache not found, creating cache at: {}".format(cache_dir) if not os.path.exists(cache_dir): os.makedirs(cache_dir) files = [ file for file in os.listdir(data_dir) if file.endswith(".path_pkl") ] assert len(files) > 0 target_point = PotentialPoint.from_config(config)[-1] for file in files: print "loading file {}".format(file) with bz2.BZ2File(os.path.join(data_dir, file), "r") as compressed_file: paths = pickle.load(compressed_file) print "asserting step sizes match" step_size = config["openrave_rl"]["action_step_size"] + 0.00001 for (traj, _) in paths: for i in range(len(traj) - 1): assert ( np.linalg.norm(np.array(traj[i]) - np.array(traj[i + 1])) < step_size) print "creating transitions" transitions = [] for (traj, poses_trajectory) in paths: goal_joints = traj[-1] goal_pose = poses_trajectory[-1][target_point.tuple] for i in range(len(traj) - 1): joints = traj[i] next_joints = traj[i + 1] transition = (joints[1:], next_joints[1:], goal_joints[1:], goal_pose) transitions.append(transition) transition_file = os.path.join(cache_dir, file + ".transitions_cache") print "writing transitions file {}".format(transition_file) with open(transition_file, "w") as pickle_file: pickle.dump(transitions, pickle_file) # with bz2.BZ2File(transition_file, 'w') as compressed_file: # pickle.dump(transitions, compressed_file) print "cache created at {}".format(cache_dir)
def __init__(self, config): self.action_step_size = config["openrave_rl"]["action_step_size"] self.goal_sensitivity = config["openrave_rl"]["goal_sensitivity"] self.keep_alive_penalty = config["openrave_rl"]["keep_alive_penalty"] self.truncate_penalty = config["openrave_rl"]["truncate_penalty"] self.openrave_manager = OpenraveManager( config["openrave_rl"]["segment_validity_step"], PotentialPoint.from_config(config), ) self.current_joints = None self.goal_joints = None self.start_joints = None self.traj = None
def __init__(self, config): self.action_step_size = config['openrave_rl']['action_step_size'] self.goal_sensitivity = config['openrave_rl']['goal_sensitivity'] self.challenging_trajectories_only = config['openrave_planner'][ 'challenging_trajectories_only'] self.planner_iterations_start = config['openrave_planner'][ 'planner_iterations_start'] self.planner_iterations_increase = config['openrave_planner'][ 'planner_iterations_increase'] self.planner_iterations_decrease = config['openrave_planner'][ 'planner_iterations_decrease'] self.max_planner_iterations = self.planner_iterations_start self.openrave_manager = OpenraveManager( config['openrave_rl']['segment_validity_step'], PotentialPoint.from_config(config))
def __init__(self, config): self.action_step_size = config["openrave_rl"]["action_step_size"] self.goal_sensitivity = config["openrave_rl"]["goal_sensitivity"] self.challenging_trajectories_only = config["openrave_planner"][ "challenging_trajectories_only"] self.planner_iterations_start = config["openrave_planner"][ "planner_iterations_start"] self.planner_iterations_increase = config["openrave_planner"][ "planner_iterations_increase"] self.planner_iterations_decrease = config["openrave_planner"][ "planner_iterations_decrease"] self.max_planner_iterations = self.planner_iterations_start self.openrave_manager = OpenraveManager( config["openrave_rl"]["segment_validity_step"], PotentialPoint.from_config(config), )
def run_motion_planner(): result = None openrave_manager = OpenraveManager( config['openrave_rl']['segment_validity_step'], PotentialPoint.from_config(config)) for start_joints, goal_joints, workspace_id, _ in queries: params_file_path = image_cache.items[workspace_id].full_filename openrave_manager.set_params(params_file_path) for i in range(repeat): start_time = datetime.datetime.now() traj = openrave_manager.plan(start_joints, goal_joints, None) # assert traj is not None end_time = datetime.datetime.now() time_diff = end_time - start_time if result is None: result = time_diff else: result += time_diff return result
np.linalg.norm(np.array(traj[i]) - np.array(traj[i + 1])) < step_size ) paths_file = os.path.join(cache_dir, file + ".paths_cache") print("writing paths file {}".format(paths_file)) with open(paths_file, "w") as pickle_file: pickle.dump(paths, pickle_file) print("cache created at {}".format(cache_dir)) train_original_dir = os.path.join("imitation_data", scenario, "train") train_transitions_dir = os.path.join("imitation_data_transitions", scenario, "train") train_transitions_dir = os.path.join( train_transitions_dir, PotentialPoint.from_config(config)[-1].str ) produce_transitions(train_original_dir, train_transitions_dir) train_paths_dir = os.path.join("imitation_data_paths", scenario, "train") produce_paths(train_original_dir, train_paths_dir) test_original_dir = os.path.join("imitation_data", scenario, "test") test_transitions_dir = os.path.join("imitation_data_transitions", scenario, "test") test_transitions_dir = os.path.join( test_transitions_dir, PotentialPoint.from_config(config)[-1].str ) produce_transitions(test_original_dir, test_transitions_dir) test_paths_dir = os.path.join("imitation_data_paths", scenario, "test") produce_paths(test_original_dir, test_paths_dir)
scenario = 'hard' model_name = '2019_01_25_10_09_04' number_of_imitation_files = 3 sphere_limitation = 1000 imitation_data_path = os.path.abspath(os.path.expanduser(os.path.join('~/ModelBasedDDPG/imitation_data', scenario))) rl_trajectories_data_path = os.path.abspath(os.path.expanduser( os.path.join('~/ModelBasedDDPG/', scenario, 'trajectories', model_name))) # load configuration config_path = os.path.join(os.getcwd(), 'config/config.yml') with open(config_path, 'r') as yml_file: config = yaml.load(yml_file) # load the workspace openrave_manager = OpenraveManager(config['openrave_rl']['segment_validity_step'], PotentialPoint.from_config(config)) def process_poses(target_poses, x_coordinate_range=(0.0, 0.13), z_coordinate_range=(0.3, 0.45)): return [p for p in target_poses if x_coordinate_range[0] <= p[0] <= x_coordinate_range[1] and z_coordinate_range[0] <= p[1] <= z_coordinate_range[1]] def process_rl_files(data_dir, trajectory_limitation): steps_offset = 40 steps_increase = 2000 trajectories_seen = 0 result = [] while trajectories_seen < trajectory_limitation: global_step_dir = os.path.join(data_dir, '{}'.format(steps_offset)) steps_offset += steps_increase for dirpath, dirnames, filenames in os.walk(global_step_dir):
collision_samples = 10000 # show_close_to_goal = True show_close_to_goal = False close_to_goal_samples = 10000 show_pose_action_direction_arrow = True show_goal_end_effector_pose = True # load configuration config_path = os.path.join(os.getcwd(), 'config/config.yml') with open(config_path, 'r') as yml_file: config = yaml.load(yml_file) # load the workspace openrave_manager = OpenraveManager( config['openrave_rl']['segment_validity_step'], PotentialPoint.from_config(config)) params_file = os.path.abspath( os.path.expanduser( os.path.join('~/ModelBasedDDPG/scenario_params', scenario, 'params.pkl'))) openrave_manager.load_params(WorkspaceParams.load_from_file(params_file)) openrave_manager.robot.SetDOFValues([0.0] + goal_joints, [0, 1, 2, 3, 4]) openrave_manager.get_initialized_viewer() red_color = np.array([1.0, 0.0, 0.0]) yellow_color = np.array([1.0, 1.0, 0.0]) green_color = np.array([0.0, 1.0, 0.0]) def create_sphere(id, radius, openrave_manager): body = RaveCreateKinBody(openrave_manager.env, '')
np.array(traj[i]) - np.array(traj[i + 1])) < step_size paths_file = os.path.join(cache_dir, file + '.paths_cache') print 'writing paths file {}'.format(paths_file) with open(paths_file, 'w') as pickle_file: pickle.dump(paths, pickle_file) print 'cache created at {}'.format(cache_dir) train_original_dir = os.path.join('imitation_data', scenario, 'train') train_transitions_dir = os.path.join('imitation_data_transitions', scenario, 'train') train_transitions_dir = os.path.join( train_transitions_dir, PotentialPoint.from_config(config)[-1].str) produce_transitions(train_original_dir, train_transitions_dir) train_paths_dir = os.path.join('imitation_data_paths', scenario, 'train') produce_paths(train_original_dir, train_paths_dir) test_original_dir = os.path.join('imitation_data', scenario, 'test') test_transitions_dir = os.path.join('imitation_data_transitions', scenario, 'test') test_transitions_dir = os.path.join(test_transitions_dir, PotentialPoint.from_config(config)[-1].str) produce_transitions(test_original_dir, test_transitions_dir) test_paths_dir = os.path.join('imitation_data_paths', scenario, 'test') produce_paths(test_original_dir, test_paths_dir) def get_files(paths_dir, transitions_dir, max_files=None):
def __init__(self, config, is_rollout_agent, image_shape=(55, 111), number_of_joints=4, pose_dimensions=2, pre_trained_reward=None, name_prefix=None): self.name_prefix = os.getpid() if name_prefix is None else name_prefix self.config = config self.potential_points = PotentialPoint.from_config(config) # input related data self.image_shape = image_shape self.number_of_joints = number_of_joints self.pose_dimensions = pose_dimensions # generate inputs all_inputs = self._create_inputs() self.joints_inputs = all_inputs[0] self.workspace_image_inputs = all_inputs[1] self.goal_joints_inputs = all_inputs[2] self.goal_pose_inputs = all_inputs[3] # images for vision self.images_3d = None if self.workspace_image_inputs is not None: self.images_3d = tf.expand_dims(self.workspace_image_inputs, axis=-1) # since we take partial derivatives w.r.t subsets of the parameters, we always need to remember which parameters # are currently being added. note that this also causes the model to be non thread safe, therefore the creation # must happen sequentially # online actor network variable_count = len(tf.trainable_variables()) actor_results = self._create_actor_network(self.joints_inputs, is_online=True, reuse_flag=False) self.online_action = actor_results[0] online_actor_tanh = actor_results[1] self.online_actor_params = tf.trainable_variables()[variable_count:] # create placeholders and assign ops to set these weights manually (used by rollout agents) self.online_actor_parameter_weights_placeholders = { var.name: tf.placeholder(tf.float32, var.get_shape()) for var in self.online_actor_params } self.online_actor_parameters_assign_ops = [ tf.assign( var, self.online_actor_parameter_weights_placeholders[var.name]) for var in self.online_actor_params ] # target actor network variable_count = len(tf.trainable_variables()) actor_results = self._create_actor_network(self.joints_inputs, is_online=False, reuse_flag=False) self.target_action = actor_results[0] self.target_actor_params = tf.trainable_variables()[variable_count:] # create placeholders and assign ops to set these weights manually (used by rollout agents) self.target_actor_parameter_weights_placeholders = { var.name: tf.placeholder(tf.float32, var.get_shape()) for var in self.target_actor_params } self.target_actor_parameters_assign_ops = [ tf.assign( var, self.target_actor_parameter_weights_placeholders[var.name]) for var in self.target_actor_params ] # this is as much as a rollout agent needs if is_rollout_agent: return tau = self.config['model']['tau'] gamma = self.config['model']['gamma'] use_reward_model = self.config['model']['use_reward_model'] self.forward_model_next_state, self.forward_model_action, forward_model_tanh = None, None, None if use_reward_model: # deterministic value of the next state (from current state, executing the online action) self.forward_model_next_state = self._next_state_model( ) if use_reward_model else None # online actor network for the result of the forward model variable_count = len(tf.trainable_variables()) actor_results = self._create_actor_network( self.forward_model_next_state, is_online=True, reuse_flag=True) self.forward_model_action = actor_results[0] forward_model_tanh = actor_results[1] assert variable_count == len(tf.trainable_variables( )) # make sure no new parameters were added # periodically update target actor with online actor weights self.update_actor_target_params = \ [self.target_actor_params[i].assign( tf.multiply(self.online_actor_params[i], tau) + tf.multiply(self.target_actor_params[i], 1. - tau) ) for i in range(len(self.target_actor_params))] # create inputs for the critic and reward network when using a constant action self.action_inputs = tf.placeholder(tf.float32, (None, self.number_of_joints), name='action_inputs') # online critic for predicting the q value for a specific joints+action pair variable_count = len(tf.trainable_variables()) self.online_q_value_fixed_action = self._create_critic_network( self.joints_inputs, self.action_inputs, is_online=True, reuse_flag=False, add_regularization_loss=True) online_critic_params = tf.trainable_variables()[variable_count:] # online critic for predicting the q value for actor update. # if using a reward model, the joint inputs are given by the forward model and so are the actions. # if in regular ddpg, the joints inputs are given by the current state inputs, the actions are the policy on # these joints. variable_count = len(tf.trainable_variables()) self.online_q_value_under_policy = self._create_critic_network( joints_input=self.forward_model_next_state if use_reward_model else self.joints_inputs, action_input=self.forward_model_action if use_reward_model else self.online_action, is_online=True, reuse_flag=True, add_regularization_loss=False) assert variable_count == len( tf.trainable_variables()) # make sure no new parameters were added # target critic network, predicting the q value current state under the target policy variable_count = len(tf.trainable_variables()) self.target_q_value_under_policy = self._create_critic_network( self.joints_inputs, self.target_action, is_online=False, reuse_flag=False, add_regularization_loss=False) target_critic_params = tf.trainable_variables()[variable_count:] # periodically update target critic with online critic weights self.update_critic_target_params = \ [target_critic_params[i].assign( tf.multiply(online_critic_params[i], tau) + tf.multiply(target_critic_params[i], 1. - tau) ) for i in range(len(target_critic_params))] self.fixed_action_reward, self.fixed_action_termination, self.online_action_reward, self.online_action_termination = None, None, None, None if use_reward_model: assert pre_trained_reward is not None variable_count = len(tf.trainable_variables()) # reward network to predict the immediate reward of a given action self.fixed_action_reward, fixed_action_status = pre_trained_reward.create_reward_network( self.joints_inputs, self.action_inputs, self.goal_joints_inputs, self.goal_pose_inputs, self.images_3d) self.fixed_action_termination = self._compute_termination_from_status( fixed_action_status) # reward network to predict the immediate reward of the online policy action self.online_action_reward, online_action_status = pre_trained_reward.create_reward_network( self.joints_inputs, self.online_action, self.goal_joints_inputs, self.goal_pose_inputs, self.images_3d) self.online_action_termination = self._compute_termination_from_status( online_action_status) assert variable_count == len(tf.trainable_variables()) # the label to use to train the online critic network self.scalar_label = tf.placeholder(tf.float32, [None, 1]) batch_size = tf.cast(tf.shape(self.joints_inputs)[0], tf.float32) # critic optimization critic_prediction_loss = tf.losses.mean_squared_error( self.scalar_label, self.online_q_value_fixed_action) critic_regularization = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) critic_regularization_loss = tf.add_n( critic_regularization) if len(critic_regularization) > 0 else 0.0 self.critic_total_loss = critic_prediction_loss + critic_regularization_loss self.critic_initial_gradients_norm, self.critic_clipped_gradients_norm, self.optimize_critic = \ self._optimize_by_loss( self.critic_total_loss, online_critic_params, self.config['critic']['learning_rate'], self.config['critic']['gradient_limit'] ) # summaries for the critic optimization self.critic_optimization_summaries = tf.summary.merge([ tf.summary.scalar('critic_prediction_loss', critic_prediction_loss), tf.summary.scalar('critic_regularization_loss', critic_regularization_loss), tf.summary.scalar('critic_total_loss', self.critic_total_loss), tf.summary.scalar('critic_gradients_norm_initial', self.critic_initial_gradients_norm), tf.summary.scalar('critic_gradients_norm_clipped', self.critic_clipped_gradients_norm), tf.summary.scalar('critic_mean_prediction', tf.reduce_mean( self.online_q_value_fixed_action)), tf.summary.histogram('critic_prediction_distribution', self.online_q_value_fixed_action), ]) # when training the actor we derive the advantage w.r.t mu's network params (mu is the online policy) if use_reward_model: # advantage is r(s, mu(s)) + \gamma * q(f(s, mu(s)), mu(f(s, mu(s)))) include_next_state = (1.0 - self.online_action_termination) # include_next_state = 1.0 self.actor_loss = -( self.online_action_reward + gamma * self.online_q_value_under_policy * include_next_state # this is actually the policy on the forward model output ) else: # advantage is q(s, mu(s)) self.actor_loss = -self.online_q_value_under_policy self.actor_loss = tf.reduce_sum(self.actor_loss) # if we have extra losses for the actor: tanh_loss_summary = None if self.config['action_predictor'][ 'tanh_preactivation_loss_coefficient'] > 0.0: tanh_preactivation_loss = tf.losses.mean_squared_error( tf.zeros_like(online_actor_tanh), online_actor_tanh) if use_reward_model: forward_model_tanh_preactivation_loss = tf.losses.mean_squared_error( tf.zeros_like(forward_model_tanh), forward_model_tanh) tanh_preactivation_loss += forward_model_tanh_preactivation_loss tanh_preactivation_loss *= self.config['action_predictor'][ 'tanh_preactivation_loss_coefficient'] self.actor_loss += tanh_preactivation_loss tanh_loss_summary = tf.summary.scalar('tanh_preactivation_loss', tanh_preactivation_loss) # divide by the batch size self.actor_loss = tf.div(self.actor_loss, batch_size) self.actor_initial_gradients_norm, self.actor_clipped_gradients_norm, self.optimize_actor = \ self._optimize_by_loss( self.actor_loss, self.online_actor_params, self.config['actor']['learning_rate'], self.config['actor']['gradient_limit'] ) # summaries for the optimization merge_list = [ tf.summary.scalar('actor_gradients_norm_initial', self.actor_initial_gradients_norm), tf.summary.scalar('actor_gradients_norm_clipped', self.actor_clipped_gradients_norm), tf.summary.scalar('actor_total_loss', self.actor_loss), ] if tanh_loss_summary is not None: merge_list.append(tanh_loss_summary) self.actor_optimization_summaries = tf.summary.merge(merge_list)
number_of_unzippers = config['general']['number_of_unzippers'] train = Oversampler(train_data_dir, batch_size, oversample_goal, oversample_collision, number_of_unzippers=number_of_unzippers) test = Oversampler(test_data_dir, batch_size, oversample_goal, oversample_collision, number_of_unzippers=number_of_unzippers) # get openrave manager openrave_manager = OpenraveManager(0.001, PotentialPoint.from_config(config)) # set summaries and saver dir summaries_dir = os.path.join('reward', 'tensorboard') train_summary_writer = tf.summary.FileWriter( os.path.join(summaries_dir, 'train_' + model_name)) test_summary_writer = tf.summary.FileWriter( os.path.join(summaries_dir, 'test_' + model_name)) saver_dir = os.path.join('reward', 'model', model_name) if not os.path.exists(saver_dir): os.makedirs(saver_dir) # save the config config_copy_path = os.path.join(saver_dir, 'config.yml') yaml.dump(config, open(config_copy_path, 'w'))
import tensorflow as tf import os import yaml from openrave_manager import OpenraveManager from potential_point import PotentialPoint is_gpu = tf.test.is_gpu_available() config_path = os.path.join(os.getcwd(), 'config/config.yml') with open(config_path, 'r') as yml_file: config = yaml.load(yml_file) potential_points = PotentialPoint.from_config(config) openrave_manager = OpenraveManager(0.01, potential_points) random_joints = openrave_manager.get_random_joints() print 'has gpu result {}'.format(is_gpu) print 'random joints result {}'.format(random_joints)