def print_model_stats(pre_trained_reward_network, test_batch_size, sess):
    # read the data
    test = load_data_from(os.path.join('supervised_data', 'test'),
                          max_read=10 * test_batch_size)
    print len(test)

    # partition to train and test
    random.shuffle(test)

    openrave_manager = OpenraveManager(
        0.001, PotentialPoint.from_config(pre_trained_reward_network.config))

    sess.run(tf.global_variables_initializer())

    # run test for one (random) batch
    random.shuffle(test)
    test_batch = oversample_batch(test, 0, test_batch_size)
    test_batch, test_rewards, test_status = get_batch_and_labels(
        test_batch, openrave_manager)
    reward_prediction, status_prediction = pre_trained_reward_network.make_prediction(
        *([sess] + test_batch))
    # see what happens for different reward classes:
    goal_rewards_stats, collision_rewards_stats, other_rewards_stats = compute_stats_per_class(
        test_status, test_rewards, status_prediction, reward_prediction)
    print 'before loading weights'
    print 'goal mean_error {} max_error {} accuracy {}'.format(
        *goal_rewards_stats)
    print 'collision mean_error {} max_error {} accuracy {}'.format(
        *collision_rewards_stats)
    print 'other mean_error {} max_error {} accuracy {}'.format(
        *other_rewards_stats)

    # load weights
    pre_trained_reward_network.load_weights(sess)
    # run test for one (random) batch
    random.shuffle(test)

    test_batch = oversample_batch(test, 0, test_batch_size)
    test_batch, test_rewards, test_status = get_batch_and_labels(
        test_batch, openrave_manager)
    reward_prediction, status_prediction = pre_trained_reward_network.make_prediction(
        *([sess] + test_batch))
    # see what happens for different reward classes:
    goal_rewards_stats, collision_rewards_stats, other_rewards_stats = compute_stats_per_class(
        test_status, test_rewards, status_prediction, reward_prediction)
    print 'after loading weights'
    print 'goal mean_error {} max_error {} accuracy {}'.format(
        *goal_rewards_stats)
    print 'collision mean_error {} max_error {} accuracy {}'.format(
        *collision_rewards_stats)
    print 'other mean_error {} max_error {} accuracy {}'.format(
        *other_rewards_stats)
Exemple #2
0
    def __init__(self, config):
        self.action_step_size = config['openrave_rl']['action_step_size']
        self.goal_sensitivity = config['openrave_rl']['goal_sensitivity']
        self.keep_alive_penalty = config['openrave_rl']['keep_alive_penalty']
        self.truncate_penalty = config['openrave_rl']['truncate_penalty']

        self.openrave_manager = OpenraveManager(
            config['openrave_rl']['segment_validity_step'], PotentialPoint.from_config(config))

        self.current_joints = None
        self.goal_joints = None
        self.start_joints = None
        self.traj = None
def produce_transitions(data_dir, cache_dir):
    print "producing transition data from original trajectories at {}".format(
        data_dir)
    assert os.path.exists(data_dir)

    if os.path.exists(cache_dir):
        print "found cache dir at {}, assuming all transitions are present there (if not delete the directory)".format(
            cache_dir)
        return

    print "cache not found, creating cache at: {}".format(cache_dir)
    if not os.path.exists(cache_dir):
        os.makedirs(cache_dir)
    files = [
        file for file in os.listdir(data_dir) if file.endswith(".path_pkl")
    ]
    assert len(files) > 0
    target_point = PotentialPoint.from_config(config)[-1]
    for file in files:
        print "loading file {}".format(file)
        with bz2.BZ2File(os.path.join(data_dir, file), "r") as compressed_file:
            paths = pickle.load(compressed_file)

        print "asserting step sizes match"
        step_size = config["openrave_rl"]["action_step_size"] + 0.00001
        for (traj, _) in paths:
            for i in range(len(traj) - 1):
                assert (
                    np.linalg.norm(np.array(traj[i]) - np.array(traj[i + 1])) <
                    step_size)

        print "creating transitions"
        transitions = []
        for (traj, poses_trajectory) in paths:
            goal_joints = traj[-1]
            goal_pose = poses_trajectory[-1][target_point.tuple]
            for i in range(len(traj) - 1):
                joints = traj[i]
                next_joints = traj[i + 1]
                transition = (joints[1:], next_joints[1:], goal_joints[1:],
                              goal_pose)
                transitions.append(transition)

        transition_file = os.path.join(cache_dir, file + ".transitions_cache")
        print "writing transitions file {}".format(transition_file)
        with open(transition_file, "w") as pickle_file:
            pickle.dump(transitions, pickle_file)
        # with bz2.BZ2File(transition_file, 'w') as compressed_file:
        #     pickle.dump(transitions, compressed_file)

    print "cache created at {}".format(cache_dir)
Exemple #4
0
    def __init__(self, config):
        self.action_step_size = config["openrave_rl"]["action_step_size"]
        self.goal_sensitivity = config["openrave_rl"]["goal_sensitivity"]
        self.keep_alive_penalty = config["openrave_rl"]["keep_alive_penalty"]
        self.truncate_penalty = config["openrave_rl"]["truncate_penalty"]

        self.openrave_manager = OpenraveManager(
            config["openrave_rl"]["segment_validity_step"],
            PotentialPoint.from_config(config),
        )

        self.current_joints = None
        self.goal_joints = None
        self.start_joints = None
        self.traj = None
Exemple #5
0
    def __init__(self, config):
        self.action_step_size = config['openrave_rl']['action_step_size']
        self.goal_sensitivity = config['openrave_rl']['goal_sensitivity']
        self.challenging_trajectories_only = config['openrave_planner'][
            'challenging_trajectories_only']
        self.planner_iterations_start = config['openrave_planner'][
            'planner_iterations_start']
        self.planner_iterations_increase = config['openrave_planner'][
            'planner_iterations_increase']
        self.planner_iterations_decrease = config['openrave_planner'][
            'planner_iterations_decrease']
        self.max_planner_iterations = self.planner_iterations_start

        self.openrave_manager = OpenraveManager(
            config['openrave_rl']['segment_validity_step'],
            PotentialPoint.from_config(config))
    def __init__(self, config):
        self.action_step_size = config["openrave_rl"]["action_step_size"]
        self.goal_sensitivity = config["openrave_rl"]["goal_sensitivity"]
        self.challenging_trajectories_only = config["openrave_planner"][
            "challenging_trajectories_only"]
        self.planner_iterations_start = config["openrave_planner"][
            "planner_iterations_start"]
        self.planner_iterations_increase = config["openrave_planner"][
            "planner_iterations_increase"]
        self.planner_iterations_decrease = config["openrave_planner"][
            "planner_iterations_decrease"]
        self.max_planner_iterations = self.planner_iterations_start

        self.openrave_manager = OpenraveManager(
            config["openrave_rl"]["segment_validity_step"],
            PotentialPoint.from_config(config),
        )
def run_motion_planner():
    result = None
    openrave_manager = OpenraveManager(
        config['openrave_rl']['segment_validity_step'],
        PotentialPoint.from_config(config))
    for start_joints, goal_joints, workspace_id, _ in queries:
        params_file_path = image_cache.items[workspace_id].full_filename
        openrave_manager.set_params(params_file_path)
        for i in range(repeat):
            start_time = datetime.datetime.now()
            traj = openrave_manager.plan(start_joints, goal_joints, None)
            # assert traj is not None
            end_time = datetime.datetime.now()
            time_diff = end_time - start_time
            if result is None:
                result = time_diff
            else:
                result += time_diff
    return result
Exemple #8
0
scenario = 'hard'
model_name = '2019_01_25_10_09_04'
number_of_imitation_files = 3
sphere_limitation = 1000

imitation_data_path = os.path.abspath(os.path.expanduser(os.path.join('~/ModelBasedDDPG/imitation_data', scenario)))
rl_trajectories_data_path = os.path.abspath(os.path.expanduser(
    os.path.join('~/ModelBasedDDPG/', scenario, 'trajectories', model_name)))

# load configuration
config_path = os.path.join(os.getcwd(), 'config/config.yml')
with open(config_path, 'r') as yml_file:
    config = yaml.load(yml_file)

# load the workspace
openrave_manager = OpenraveManager(config['openrave_rl']['segment_validity_step'], PotentialPoint.from_config(config))


def process_poses(target_poses, x_coordinate_range=(0.0, 0.13), z_coordinate_range=(0.3, 0.45)):
    return [p for p in target_poses if x_coordinate_range[0] <= p[0] <= x_coordinate_range[1] and z_coordinate_range[0] <= p[1] <= z_coordinate_range[1]]


def process_rl_files(data_dir, trajectory_limitation):
    steps_offset = 40
    steps_increase = 2000
    trajectories_seen = 0
    result = []
    while trajectories_seen < trajectory_limitation:
        global_step_dir = os.path.join(data_dir, '{}'.format(steps_offset))
        steps_offset += steps_increase
        for dirpath, dirnames, filenames in os.walk(global_step_dir):
collision_samples = 10000
# show_close_to_goal = True
show_close_to_goal = False
close_to_goal_samples = 10000
show_pose_action_direction_arrow = True
show_goal_end_effector_pose = True

# load configuration
config_path = os.path.join(os.getcwd(), 'config/config.yml')
with open(config_path, 'r') as yml_file:
    config = yaml.load(yml_file)

# load the workspace
openrave_manager = OpenraveManager(
    config['openrave_rl']['segment_validity_step'],
    PotentialPoint.from_config(config))
params_file = os.path.abspath(
    os.path.expanduser(
        os.path.join('~/ModelBasedDDPG/scenario_params', scenario,
                     'params.pkl')))
openrave_manager.load_params(WorkspaceParams.load_from_file(params_file))
openrave_manager.robot.SetDOFValues([0.0] + goal_joints, [0, 1, 2, 3, 4])

openrave_manager.get_initialized_viewer()
red_color = np.array([1.0, 0.0, 0.0])
yellow_color = np.array([1.0, 1.0, 0.0])
green_color = np.array([0.0, 1.0, 0.0])


def create_sphere(id, radius, openrave_manager):
    body = RaveCreateKinBody(openrave_manager.env, '')
                    np.array(traj[i]) - np.array(traj[i + 1])) < step_size

        paths_file = os.path.join(cache_dir, file + '.paths_cache')
        print 'writing paths file {}'.format(paths_file)
        with open(paths_file, 'w') as pickle_file:
            pickle.dump(paths, pickle_file)

    print 'cache created at {}'.format(cache_dir)


train_original_dir = os.path.join('imitation_data', scenario, 'train')
train_transitions_dir = os.path.join('imitation_data_transitions', scenario,
                                     'train')
train_transitions_dir = os.path.join(
    train_transitions_dir,
    PotentialPoint.from_config(config)[-1].str)
produce_transitions(train_original_dir, train_transitions_dir)
train_paths_dir = os.path.join('imitation_data_paths', scenario, 'train')
produce_paths(train_original_dir, train_paths_dir)

test_original_dir = os.path.join('imitation_data', scenario, 'test')
test_transitions_dir = os.path.join('imitation_data_transitions', scenario,
                                    'test')
test_transitions_dir = os.path.join(test_transitions_dir,
                                    PotentialPoint.from_config(config)[-1].str)
produce_transitions(test_original_dir, test_transitions_dir)
test_paths_dir = os.path.join('imitation_data_paths', scenario, 'test')
produce_paths(test_original_dir, test_paths_dir)


def get_files(paths_dir, transitions_dir, max_files=None):
Exemple #11
0
    def __init__(self,
                 config,
                 is_rollout_agent,
                 image_shape=(55, 111),
                 number_of_joints=4,
                 pose_dimensions=2,
                 pre_trained_reward=None,
                 name_prefix=None):
        self.name_prefix = os.getpid() if name_prefix is None else name_prefix
        self.config = config
        self.potential_points = PotentialPoint.from_config(config)

        # input related data
        self.image_shape = image_shape
        self.number_of_joints = number_of_joints
        self.pose_dimensions = pose_dimensions

        # generate inputs
        all_inputs = self._create_inputs()
        self.joints_inputs = all_inputs[0]
        self.workspace_image_inputs = all_inputs[1]
        self.goal_joints_inputs = all_inputs[2]
        self.goal_pose_inputs = all_inputs[3]

        # images for vision
        self.images_3d = None
        if self.workspace_image_inputs is not None:
            self.images_3d = tf.expand_dims(self.workspace_image_inputs,
                                            axis=-1)

        # since we take partial derivatives w.r.t subsets of the parameters, we always need to remember which parameters
        # are currently being added. note that this also causes the model to be non thread safe, therefore the creation
        # must happen sequentially

        # online actor network
        variable_count = len(tf.trainable_variables())
        actor_results = self._create_actor_network(self.joints_inputs,
                                                   is_online=True,
                                                   reuse_flag=False)
        self.online_action = actor_results[0]
        online_actor_tanh = actor_results[1]
        self.online_actor_params = tf.trainable_variables()[variable_count:]

        # create placeholders and assign ops to set these weights manually (used by rollout agents)
        self.online_actor_parameter_weights_placeholders = {
            var.name: tf.placeholder(tf.float32, var.get_shape())
            for var in self.online_actor_params
        }
        self.online_actor_parameters_assign_ops = [
            tf.assign(
                var,
                self.online_actor_parameter_weights_placeholders[var.name])
            for var in self.online_actor_params
        ]

        # target actor network
        variable_count = len(tf.trainable_variables())
        actor_results = self._create_actor_network(self.joints_inputs,
                                                   is_online=False,
                                                   reuse_flag=False)
        self.target_action = actor_results[0]
        self.target_actor_params = tf.trainable_variables()[variable_count:]

        # create placeholders and assign ops to set these weights manually (used by rollout agents)
        self.target_actor_parameter_weights_placeholders = {
            var.name: tf.placeholder(tf.float32, var.get_shape())
            for var in self.target_actor_params
        }
        self.target_actor_parameters_assign_ops = [
            tf.assign(
                var,
                self.target_actor_parameter_weights_placeholders[var.name])
            for var in self.target_actor_params
        ]

        # this is as much as a rollout agent needs
        if is_rollout_agent:
            return

        tau = self.config['model']['tau']
        gamma = self.config['model']['gamma']
        use_reward_model = self.config['model']['use_reward_model']
        self.forward_model_next_state, self.forward_model_action, forward_model_tanh = None, None, None
        if use_reward_model:
            # deterministic value of the next state (from current state, executing the online action)
            self.forward_model_next_state = self._next_state_model(
            ) if use_reward_model else None

            # online actor network for the result of the forward model
            variable_count = len(tf.trainable_variables())
            actor_results = self._create_actor_network(
                self.forward_model_next_state, is_online=True, reuse_flag=True)
            self.forward_model_action = actor_results[0]
            forward_model_tanh = actor_results[1]
            assert variable_count == len(tf.trainable_variables(
            ))  # make sure no new parameters were added

        # periodically update target actor with online actor weights
        self.update_actor_target_params = \
            [self.target_actor_params[i].assign(
                tf.multiply(self.online_actor_params[i], tau) + tf.multiply(self.target_actor_params[i], 1. - tau)
            ) for i in range(len(self.target_actor_params))]

        # create inputs for the critic and reward network when using a constant action
        self.action_inputs = tf.placeholder(tf.float32,
                                            (None, self.number_of_joints),
                                            name='action_inputs')

        # online critic for predicting the q value for a specific joints+action pair
        variable_count = len(tf.trainable_variables())
        self.online_q_value_fixed_action = self._create_critic_network(
            self.joints_inputs,
            self.action_inputs,
            is_online=True,
            reuse_flag=False,
            add_regularization_loss=True)
        online_critic_params = tf.trainable_variables()[variable_count:]

        # online critic for predicting the q value for actor update.
        # if using a reward model, the joint inputs are given by the forward model and so are the actions.
        # if in regular ddpg, the joints inputs are given by the current state inputs, the actions are the policy on
        # these joints.
        variable_count = len(tf.trainable_variables())
        self.online_q_value_under_policy = self._create_critic_network(
            joints_input=self.forward_model_next_state
            if use_reward_model else self.joints_inputs,
            action_input=self.forward_model_action
            if use_reward_model else self.online_action,
            is_online=True,
            reuse_flag=True,
            add_regularization_loss=False)
        assert variable_count == len(
            tf.trainable_variables())  # make sure no new parameters were added

        # target critic network, predicting the q value current state under the target policy
        variable_count = len(tf.trainable_variables())
        self.target_q_value_under_policy = self._create_critic_network(
            self.joints_inputs,
            self.target_action,
            is_online=False,
            reuse_flag=False,
            add_regularization_loss=False)
        target_critic_params = tf.trainable_variables()[variable_count:]

        # periodically update target critic with online critic weights
        self.update_critic_target_params = \
            [target_critic_params[i].assign(
                tf.multiply(online_critic_params[i], tau) + tf.multiply(target_critic_params[i], 1. - tau)
            ) for i in range(len(target_critic_params))]

        self.fixed_action_reward, self.fixed_action_termination, self.online_action_reward, self.online_action_termination = None, None, None, None
        if use_reward_model:
            assert pre_trained_reward is not None
            variable_count = len(tf.trainable_variables())
            # reward network to predict the immediate reward of a given action
            self.fixed_action_reward, fixed_action_status = pre_trained_reward.create_reward_network(
                self.joints_inputs, self.action_inputs,
                self.goal_joints_inputs, self.goal_pose_inputs, self.images_3d)
            self.fixed_action_termination = self._compute_termination_from_status(
                fixed_action_status)
            # reward network to predict the immediate reward of the online policy action
            self.online_action_reward, online_action_status = pre_trained_reward.create_reward_network(
                self.joints_inputs, self.online_action,
                self.goal_joints_inputs, self.goal_pose_inputs, self.images_3d)
            self.online_action_termination = self._compute_termination_from_status(
                online_action_status)
            assert variable_count == len(tf.trainable_variables())

        # the label to use to train the online critic network
        self.scalar_label = tf.placeholder(tf.float32, [None, 1])

        batch_size = tf.cast(tf.shape(self.joints_inputs)[0], tf.float32)

        # critic optimization
        critic_prediction_loss = tf.losses.mean_squared_error(
            self.scalar_label, self.online_q_value_fixed_action)
        critic_regularization = tf.get_collection(
            tf.GraphKeys.REGULARIZATION_LOSSES)
        critic_regularization_loss = tf.add_n(
            critic_regularization) if len(critic_regularization) > 0 else 0.0
        self.critic_total_loss = critic_prediction_loss + critic_regularization_loss

        self.critic_initial_gradients_norm, self.critic_clipped_gradients_norm, self.optimize_critic = \
            self._optimize_by_loss(
                self.critic_total_loss, online_critic_params, self.config['critic']['learning_rate'],
                self.config['critic']['gradient_limit']
            )

        # summaries for the critic optimization
        self.critic_optimization_summaries = tf.summary.merge([
            tf.summary.scalar('critic_prediction_loss',
                              critic_prediction_loss),
            tf.summary.scalar('critic_regularization_loss',
                              critic_regularization_loss),
            tf.summary.scalar('critic_total_loss', self.critic_total_loss),
            tf.summary.scalar('critic_gradients_norm_initial',
                              self.critic_initial_gradients_norm),
            tf.summary.scalar('critic_gradients_norm_clipped',
                              self.critic_clipped_gradients_norm),
            tf.summary.scalar('critic_mean_prediction',
                              tf.reduce_mean(
                                  self.online_q_value_fixed_action)),
            tf.summary.histogram('critic_prediction_distribution',
                                 self.online_q_value_fixed_action),
        ])

        # when training the actor we derive the advantage w.r.t mu's network params (mu is the online policy)
        if use_reward_model:
            # advantage is r(s, mu(s)) + \gamma * q(f(s, mu(s)), mu(f(s, mu(s))))
            include_next_state = (1.0 - self.online_action_termination)
            # include_next_state = 1.0
            self.actor_loss = -(
                self.online_action_reward +
                gamma * self.online_q_value_under_policy * include_next_state
                # this is actually the policy on the forward model output
            )
        else:
            # advantage is q(s, mu(s))
            self.actor_loss = -self.online_q_value_under_policy
        self.actor_loss = tf.reduce_sum(self.actor_loss)
        # if we have extra losses for the actor:
        tanh_loss_summary = None
        if self.config['action_predictor'][
                'tanh_preactivation_loss_coefficient'] > 0.0:
            tanh_preactivation_loss = tf.losses.mean_squared_error(
                tf.zeros_like(online_actor_tanh), online_actor_tanh)
            if use_reward_model:
                forward_model_tanh_preactivation_loss = tf.losses.mean_squared_error(
                    tf.zeros_like(forward_model_tanh), forward_model_tanh)
                tanh_preactivation_loss += forward_model_tanh_preactivation_loss
            tanh_preactivation_loss *= self.config['action_predictor'][
                'tanh_preactivation_loss_coefficient']
            self.actor_loss += tanh_preactivation_loss
            tanh_loss_summary = tf.summary.scalar('tanh_preactivation_loss',
                                                  tanh_preactivation_loss)

        # divide by the batch size
        self.actor_loss = tf.div(self.actor_loss, batch_size)

        self.actor_initial_gradients_norm, self.actor_clipped_gradients_norm, self.optimize_actor = \
            self._optimize_by_loss(
                self.actor_loss, self.online_actor_params, self.config['actor']['learning_rate'],
                self.config['actor']['gradient_limit']
            )

        # summaries for the optimization
        merge_list = [
            tf.summary.scalar('actor_gradients_norm_initial',
                              self.actor_initial_gradients_norm),
            tf.summary.scalar('actor_gradients_norm_clipped',
                              self.actor_clipped_gradients_norm),
            tf.summary.scalar('actor_total_loss', self.actor_loss),
        ]
        if tanh_loss_summary is not None:
            merge_list.append(tanh_loss_summary)
        self.actor_optimization_summaries = tf.summary.merge(merge_list)
Exemple #12
0
number_of_unzippers = config['general']['number_of_unzippers']

train = Oversampler(train_data_dir,
                    batch_size,
                    oversample_goal,
                    oversample_collision,
                    number_of_unzippers=number_of_unzippers)
test = Oversampler(test_data_dir,
                   batch_size,
                   oversample_goal,
                   oversample_collision,
                   number_of_unzippers=number_of_unzippers)

# get openrave manager
openrave_manager = OpenraveManager(0.001, PotentialPoint.from_config(config))

# set summaries and saver dir
summaries_dir = os.path.join('reward', 'tensorboard')
train_summary_writer = tf.summary.FileWriter(
    os.path.join(summaries_dir, 'train_' + model_name))
test_summary_writer = tf.summary.FileWriter(
    os.path.join(summaries_dir, 'test_' + model_name))
saver_dir = os.path.join('reward', 'model', model_name)
if not os.path.exists(saver_dir):
    os.makedirs(saver_dir)

# save the config
config_copy_path = os.path.join(saver_dir, 'config.yml')
yaml.dump(config, open(config_copy_path, 'w'))
Exemple #13
0
import tensorflow as tf
import os
import yaml

from openrave_manager import OpenraveManager
from potential_point import PotentialPoint

is_gpu = tf.test.is_gpu_available()
config_path = os.path.join(os.getcwd(), 'config/config.yml')
with open(config_path, 'r') as yml_file:
    config = yaml.load(yml_file)
potential_points = PotentialPoint.from_config(config)
openrave_manager = OpenraveManager(0.01, potential_points)
random_joints = openrave_manager.get_random_joints()

print 'has gpu result {}'.format(is_gpu)
print 'random joints result {}'.format(random_joints)
#     import matplotlib.pyplot as plt
#
#     fig = plt.figure()
#     ax = fig.add_subplot(111, projection='3d')
#     ax.scatter([t[0] for t in transformed], [t[1] for t in transformed], [t[2] for t in transformed])
#     ax.set_xlabel('X Label')
#     ax.set_ylabel('Y Label')
#     ax.set_zlabel('Z Label')
#
#     plt.show()
#
#     print 'here'

if __name__ == "__main__":
    potential_points = [
        PotentialPoint(t) for t in [(4, 0.0, 0.0), (5, 0.0, 0.0)]
    ]
    m = OpenraveManager(0.01, potential_points)
    joints0 = [0.0] * 5
    res1 = m.get_potential_points_poses(joints0)
    res2 = m.get_links_poses(joints0)
    print res1[potential_points[0].tuple] == res2[m.links_names[
        potential_points[0].link]]
    print res1[potential_points[1].tuple] == res2[m.links_names[
        potential_points[1].link]]

    res3 = m.get_potential_points_jacobians(joints0)
    res4 = m.get_links_jacobians(joints0)
    print res3[potential_points[0].tuple] == res4[m.links_names[
        potential_points[0].link]]
    print res3[potential_points[1].tuple] == res4[m.links_names[
                    np.linalg.norm(np.array(traj[i]) - np.array(traj[i + 1]))
                    < step_size
                )

        paths_file = os.path.join(cache_dir, file + ".paths_cache")
        print("writing paths file {}".format(paths_file))
        with open(paths_file, "w") as pickle_file:
            pickle.dump(paths, pickle_file)

    print("cache created at {}".format(cache_dir))


train_original_dir = os.path.join("imitation_data", scenario, "train")
train_transitions_dir = os.path.join("imitation_data_transitions", scenario, "train")
train_transitions_dir = os.path.join(
    train_transitions_dir, PotentialPoint.from_config(config)[-1].str
)
produce_transitions(train_original_dir, train_transitions_dir)
train_paths_dir = os.path.join("imitation_data_paths", scenario, "train")
produce_paths(train_original_dir, train_paths_dir)

test_original_dir = os.path.join("imitation_data", scenario, "test")
test_transitions_dir = os.path.join("imitation_data_transitions", scenario, "test")
test_transitions_dir = os.path.join(
    test_transitions_dir, PotentialPoint.from_config(config)[-1].str
)
produce_transitions(test_original_dir, test_transitions_dir)
test_paths_dir = os.path.join("imitation_data_paths", scenario, "test")
produce_paths(test_original_dir, test_paths_dir)