Beispiel #1
0
    def sim_mode(self, emb_model_path, ctr_model_path, state_path):
        device = self.device

        self.load_emb_net(emb_model_path, device)
        self.load_ctr_net(ctr_model_path, device)
        self.scale, self.bias = load_scale_and_bias(state_path)
        self.scale = torch.from_numpy(np.array(self.scale, np.float32)).to(device)
        self.bias = torch.from_numpy(np.array(self.bias, np.float32)).to(device)
Beispiel #2
0
    def __init__(self, model, scale_bias_file, sess):
        self.sess = sess
        self.model = model

        if scale_bias_file:
            self.scale, self.bias = load_scale_and_bias(scale_bias_file)
        else:
            self.scale = None
Beispiel #3
0
    def __init__(self, model, scale_bias_file, sess):
        self.sess = sess
        self.model = model
        self.zero_demoX = np.zeros([1, 100, 20])
        self.zero_demoU = np.zeros([1, 100, 7])

        if scale_bias_file:
            self.scale, self.bias = load_scale_and_bias(scale_bias_file)
        else:
            self.scale = None
Beispiel #4
0
    def __init__(self, model, scale_bias_file, sess, graph):
        self.sess = sess
        self.model = model

        with graph.as_default():
            variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                          scope='model')
            variables = [
                v for v in variables if ('b' in v.name or 'w' in v.name)
            ]
        # TODO: try transductive and non-transductive options
        self.reptile = Reptile(sess,
                               graph,
                               variables=variables,
                               transductive=False,
                               pre_step_op=None)

        if scale_bias_file:
            self.scale, self.bias = load_scale_and_bias(scale_bias_file)
        else:
            self.scale = None
Beispiel #5
0
    def __init__(self,
                 demo_dir="../mil/data/sim_push/",
                 state_path=None,
                 num_batch_tasks=1,
                 train_n_shot=1,
                 test_n_shot=1,
                 valid=False,
                 val_size=None):

        self.demo_dir = demo_dir
        self.train_n_shot = train_n_shot
        self.test_n_shot = test_n_shot
        self.valid = valid
        self.val_size = val_size

        if os.path.exists(os.path.join(demo_dir, "cache")):
            print("cache exists")
        else:
            make_cache(demo_dir)

        # gif & pkl for all tasks
        gif_dirs = natsorted(glob.glob(os.path.join(demo_dir, "object_*")))
        pkl_files = natsorted(glob.glob(os.path.join(demo_dir, "*.pkl")))
        self.n_tasks = len(gif_dirs) - len(gif_dirs) % num_batch_tasks
        print("n_tasks:", self.n_tasks)

        gif_dirs = gif_dirs[:self.n_tasks]
        pkl_files = pkl_files[:self.n_tasks]

        if val_size:
            self.n_valid = num_batch_tasks  # int(self.n_tasks*val_size)
            self.n_train = self.n_tasks - self.n_valid
            if not valid:
                self.n_tasks = self.n_train
            else:
                self.n_tasks = self.n_valid

        self.scale, self.bias = load_scale_and_bias(state_path)
        self.scale = torch.from_numpy(np.array(self.scale, np.float32))
        self.bias = torch.from_numpy(np.array(self.bias, np.float32))
Beispiel #6
0
def evaluate_vision_reach(env, graph, model, data_generator, sess, exp_string, record_gifs, log_dir):
    T = model.T
    scale, bias = load_scale_and_bias('data/scale_and_bias_sim_vision_reach.pkl')
    successes = []
    selected_demo = data_generator.selected_demo
    if record_gifs:
        record_gifs_dir = os.path.join(log_dir, 'evaluated_gifs')
        mkdir_p(record_gifs_dir)
    for i in xrange(len(selected_demo['selected_demoX'])):
        selected_demoO = selected_demo['selected_demoO'][i]
        selected_demoX = selected_demo['selected_demoX'][i]
        selected_demoU = selected_demo['selected_demoU'][i]
        if record_gifs:
            gifs_dir = os.path.join(record_gifs_dir, 'color_%d' % i)
            mkdir_p(gifs_dir)
        for j in xrange(REACH_DEMO_CONDITIONS):
            if j in data_generator.demos[i]['demoConditions']:
                dists = []
                # ob = env.reset()
                # use env.set_state here to arrange blocks
                Os = []
                for t in range(T):
                    # import pdb; pdb.set_trace()
                    env.render()
                    time.sleep(0.05)
                    obs, state = env.env.get_current_image_obs()
                    Os.append(obs)
                    obs = np.transpose(obs, [2, 1, 0]) / 255.0
                    obs = obs.reshape(1, 1, -1)
                    state = state.reshape(1, 1, -1)
                    feed_dict = {
                        model.obsa: selected_demoO,
                        model.statea: selected_demoX.dot(scale) + bias,
                        model.actiona: selected_demoU,
                        model.obsb: obs,
                        model.stateb: state.dot(scale) + bias
                    }
                    with graph.as_default():
                        action = sess.run(model.test_act_op, feed_dict=feed_dict)
                    ob, reward, done, reward_dict = env.step(np.squeeze(action))
                    dist = -reward_dict['reward_dist']
                    if t >= T - REACH_SUCCESS_TIME_RANGE:
                        dists.append(dist)
                if np.amin(dists) <= REACH_SUCCESS_THRESH:
                    successes.append(1.)
                else:
                    successes.append(0.)
                if record_gifs:
                    video = np.array(Os)
                    record_gif_path = os.path.join(gifs_dir, 'cond%d.samp0.gif' % j)
                    print 'Saving gif sample to :%s' % record_gif_path
                    imageio.mimwrite(record_gif_path, video)
            env.render(close=True)
            if j != REACH_DEMO_CONDITIONS - 1 or i != len(selected_demo['selected_demoX']) - 1:
                env.env.next()
                env.render()
                time.sleep(0.5)
        if i % 5  == 0:
            print "Task %d: current success rate is %.5f" % (i, np.mean(successes))
    success_rate_msg = "Final success rate is %.5f" % (np.mean(successes))
    print success_rate_msg
    with open('logs/log_sim_vision_reach.txt', 'a') as f:
        f.write(exp_string + ':\n')
        f.write(success_rate_msg + '\n')
        
Beispiel #7
0
def control_robot(graph, model, data_generator, sess, exp_string, log_dir):
    REACH_SUCCESS_THRESH = 0.05
    PUSH_SUCCESS_THRESH = 0.13
    SUCCESS_TIME_RANGE = 10
    robot = HSRBMover()

    if FLAGS.experiment == 'pushing':
        obj = robot.detect2()
        robot.approach_right(obj)
        rospy.sleep(2)


    T = model.T
    scale, bias = load_scale_and_bias('data/scale_and_bias_%s.pkl' % FLAGS.experiment)
    successes = []
    dists = []
    selected_demo = data_generator.selected_demo
    for i in xrange(len(selected_demo['selected_demoX'])):
        selected_demoO = selected_demo['selected_demoO'][i]
        selected_demoX = selected_demo['selected_demoX'][i]
        selected_demoU = selected_demo['selected_demoU'][i]
        obj = robot.detect()
        for t in range(T):
            rospy.sleep(0.05)
            obs, state, speed = robot.return_observation(FLAGS.image_topic, FLAGS.end_effector_frame, obj)
            obs = np.transpose(obs, [2, 1, 0]) / 255.0
            obs = obs.reshape(1, 1, -1)
            state[0] = np.array(state[0])
            state[1] = np.array(state[1])
            state = np.array(state)
            state = state.reshape(1, 1, -1)

            print selected_demoX
            print "---"
            print state

            feed_dict = {
                model.obsa: selected_demoO,
                model.statea: selected_demoX.dot(scale) + bias,
                model.actiona: selected_demoU,
                model.obsb: obs,
                model.stateb: state.dot(scale) + bias
             }
            with graph.as_default():
                print model.test_act_op
                action = sess.run(model.test_act_op, feed_dict=feed_dict)

            if FLAGS.experiment == 'reaching':
                robot.reach(action)
            else:
                robot.push_left_feedback2(action)
            _, after_state, _ = robot.return_observation(FLAGS.image_topic, FLAGS.end_effector_frame, obj)
            #ob, reward, done, reward_dict = env.step(np.squeeze(action))
            dist = (after_state[0]**2 + after_state[1]**2 + after_state[2]**2) **(.5)
            print "Distance is %s" % dist
            if t >= T - SUCCESS_TIME_RANGE:
                dists.append(dist)
            if dist >= PUSH_SUCCESS_THRESH and FLAGS.experiment == 'pushing':
                successes.append(1.)
                robot.gripper.set_distance(0.1)
        if np.amin(dists) <= REACH_SUCCESS_THRESH and FLAGS.experiment == 'reaching':
            successes.append(1.)
        else:
            successes.append(0.)
Beispiel #8
0
#assert exp_string == 'vision_reach.xavier_init.3_conv.3_strides.40_filters.4_fc.200_dim.bt_dim_10.mbs_5.ubs_1.numstep_1.updatelr_0.01.clip_20.fp'
assert exp_string == 'mean_loss'
#Load trained model
with graph.as_default():
    # Set up saver.
    saver = tf.train.Saver(max_to_keep=2)
    # Initialize variables.
    init_op = tf.global_variables_initializer()
    sess.run(init_op, feed_dict=None)
print(log_dir)
model_file = tf.train.latest_checkpoint(log_dir)  #Load Latest model
print("Restoring model weights from " + model_file)
with graph.as_default():
    saver.restore(sess, model_file)
print('done')
scale, bias = load_scale_and_bias(
    '/home/teju/code/python/mil/data/scale_and_bias_vision_reach.pkl')
env.env.reset_env()
mode = "rgb_array"
observation = env.reset()

joint_angles = observation[0:2]
joint_velocities = observation[2:4]
joint_torques = observation[4:6]
ee_pose = observation[6:]
state = np.squeeze(joint_angles)
state = state.reshape(1, 1, -1)
img_seq = []
rgb_img = env.render()
print(state)
for t in range(env.env.T * 10):
    print("Executing Time step: {}".format(t + 1))