def sim_mode(self, emb_model_path, ctr_model_path, state_path): device = self.device self.load_emb_net(emb_model_path, device) self.load_ctr_net(ctr_model_path, device) self.scale, self.bias = load_scale_and_bias(state_path) self.scale = torch.from_numpy(np.array(self.scale, np.float32)).to(device) self.bias = torch.from_numpy(np.array(self.bias, np.float32)).to(device)
def __init__(self, model, scale_bias_file, sess): self.sess = sess self.model = model if scale_bias_file: self.scale, self.bias = load_scale_and_bias(scale_bias_file) else: self.scale = None
def __init__(self, model, scale_bias_file, sess): self.sess = sess self.model = model self.zero_demoX = np.zeros([1, 100, 20]) self.zero_demoU = np.zeros([1, 100, 7]) if scale_bias_file: self.scale, self.bias = load_scale_and_bias(scale_bias_file) else: self.scale = None
def __init__(self, model, scale_bias_file, sess, graph): self.sess = sess self.model = model with graph.as_default(): variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='model') variables = [ v for v in variables if ('b' in v.name or 'w' in v.name) ] # TODO: try transductive and non-transductive options self.reptile = Reptile(sess, graph, variables=variables, transductive=False, pre_step_op=None) if scale_bias_file: self.scale, self.bias = load_scale_and_bias(scale_bias_file) else: self.scale = None
def __init__(self, demo_dir="../mil/data/sim_push/", state_path=None, num_batch_tasks=1, train_n_shot=1, test_n_shot=1, valid=False, val_size=None): self.demo_dir = demo_dir self.train_n_shot = train_n_shot self.test_n_shot = test_n_shot self.valid = valid self.val_size = val_size if os.path.exists(os.path.join(demo_dir, "cache")): print("cache exists") else: make_cache(demo_dir) # gif & pkl for all tasks gif_dirs = natsorted(glob.glob(os.path.join(demo_dir, "object_*"))) pkl_files = natsorted(glob.glob(os.path.join(demo_dir, "*.pkl"))) self.n_tasks = len(gif_dirs) - len(gif_dirs) % num_batch_tasks print("n_tasks:", self.n_tasks) gif_dirs = gif_dirs[:self.n_tasks] pkl_files = pkl_files[:self.n_tasks] if val_size: self.n_valid = num_batch_tasks # int(self.n_tasks*val_size) self.n_train = self.n_tasks - self.n_valid if not valid: self.n_tasks = self.n_train else: self.n_tasks = self.n_valid self.scale, self.bias = load_scale_and_bias(state_path) self.scale = torch.from_numpy(np.array(self.scale, np.float32)) self.bias = torch.from_numpy(np.array(self.bias, np.float32))
def evaluate_vision_reach(env, graph, model, data_generator, sess, exp_string, record_gifs, log_dir): T = model.T scale, bias = load_scale_and_bias('data/scale_and_bias_sim_vision_reach.pkl') successes = [] selected_demo = data_generator.selected_demo if record_gifs: record_gifs_dir = os.path.join(log_dir, 'evaluated_gifs') mkdir_p(record_gifs_dir) for i in xrange(len(selected_demo['selected_demoX'])): selected_demoO = selected_demo['selected_demoO'][i] selected_demoX = selected_demo['selected_demoX'][i] selected_demoU = selected_demo['selected_demoU'][i] if record_gifs: gifs_dir = os.path.join(record_gifs_dir, 'color_%d' % i) mkdir_p(gifs_dir) for j in xrange(REACH_DEMO_CONDITIONS): if j in data_generator.demos[i]['demoConditions']: dists = [] # ob = env.reset() # use env.set_state here to arrange blocks Os = [] for t in range(T): # import pdb; pdb.set_trace() env.render() time.sleep(0.05) obs, state = env.env.get_current_image_obs() Os.append(obs) obs = np.transpose(obs, [2, 1, 0]) / 255.0 obs = obs.reshape(1, 1, -1) state = state.reshape(1, 1, -1) feed_dict = { model.obsa: selected_demoO, model.statea: selected_demoX.dot(scale) + bias, model.actiona: selected_demoU, model.obsb: obs, model.stateb: state.dot(scale) + bias } with graph.as_default(): action = sess.run(model.test_act_op, feed_dict=feed_dict) ob, reward, done, reward_dict = env.step(np.squeeze(action)) dist = -reward_dict['reward_dist'] if t >= T - REACH_SUCCESS_TIME_RANGE: dists.append(dist) if np.amin(dists) <= REACH_SUCCESS_THRESH: successes.append(1.) else: successes.append(0.) if record_gifs: video = np.array(Os) record_gif_path = os.path.join(gifs_dir, 'cond%d.samp0.gif' % j) print 'Saving gif sample to :%s' % record_gif_path imageio.mimwrite(record_gif_path, video) env.render(close=True) if j != REACH_DEMO_CONDITIONS - 1 or i != len(selected_demo['selected_demoX']) - 1: env.env.next() env.render() time.sleep(0.5) if i % 5 == 0: print "Task %d: current success rate is %.5f" % (i, np.mean(successes)) success_rate_msg = "Final success rate is %.5f" % (np.mean(successes)) print success_rate_msg with open('logs/log_sim_vision_reach.txt', 'a') as f: f.write(exp_string + ':\n') f.write(success_rate_msg + '\n')
def control_robot(graph, model, data_generator, sess, exp_string, log_dir): REACH_SUCCESS_THRESH = 0.05 PUSH_SUCCESS_THRESH = 0.13 SUCCESS_TIME_RANGE = 10 robot = HSRBMover() if FLAGS.experiment == 'pushing': obj = robot.detect2() robot.approach_right(obj) rospy.sleep(2) T = model.T scale, bias = load_scale_and_bias('data/scale_and_bias_%s.pkl' % FLAGS.experiment) successes = [] dists = [] selected_demo = data_generator.selected_demo for i in xrange(len(selected_demo['selected_demoX'])): selected_demoO = selected_demo['selected_demoO'][i] selected_demoX = selected_demo['selected_demoX'][i] selected_demoU = selected_demo['selected_demoU'][i] obj = robot.detect() for t in range(T): rospy.sleep(0.05) obs, state, speed = robot.return_observation(FLAGS.image_topic, FLAGS.end_effector_frame, obj) obs = np.transpose(obs, [2, 1, 0]) / 255.0 obs = obs.reshape(1, 1, -1) state[0] = np.array(state[0]) state[1] = np.array(state[1]) state = np.array(state) state = state.reshape(1, 1, -1) print selected_demoX print "---" print state feed_dict = { model.obsa: selected_demoO, model.statea: selected_demoX.dot(scale) + bias, model.actiona: selected_demoU, model.obsb: obs, model.stateb: state.dot(scale) + bias } with graph.as_default(): print model.test_act_op action = sess.run(model.test_act_op, feed_dict=feed_dict) if FLAGS.experiment == 'reaching': robot.reach(action) else: robot.push_left_feedback2(action) _, after_state, _ = robot.return_observation(FLAGS.image_topic, FLAGS.end_effector_frame, obj) #ob, reward, done, reward_dict = env.step(np.squeeze(action)) dist = (after_state[0]**2 + after_state[1]**2 + after_state[2]**2) **(.5) print "Distance is %s" % dist if t >= T - SUCCESS_TIME_RANGE: dists.append(dist) if dist >= PUSH_SUCCESS_THRESH and FLAGS.experiment == 'pushing': successes.append(1.) robot.gripper.set_distance(0.1) if np.amin(dists) <= REACH_SUCCESS_THRESH and FLAGS.experiment == 'reaching': successes.append(1.) else: successes.append(0.)
#assert exp_string == 'vision_reach.xavier_init.3_conv.3_strides.40_filters.4_fc.200_dim.bt_dim_10.mbs_5.ubs_1.numstep_1.updatelr_0.01.clip_20.fp' assert exp_string == 'mean_loss' #Load trained model with graph.as_default(): # Set up saver. saver = tf.train.Saver(max_to_keep=2) # Initialize variables. init_op = tf.global_variables_initializer() sess.run(init_op, feed_dict=None) print(log_dir) model_file = tf.train.latest_checkpoint(log_dir) #Load Latest model print("Restoring model weights from " + model_file) with graph.as_default(): saver.restore(sess, model_file) print('done') scale, bias = load_scale_and_bias( '/home/teju/code/python/mil/data/scale_and_bias_vision_reach.pkl') env.env.reset_env() mode = "rgb_array" observation = env.reset() joint_angles = observation[0:2] joint_velocities = observation[2:4] joint_torques = observation[4:6] ee_pose = observation[6:] state = np.squeeze(joint_angles) state = state.reshape(1, 1, -1) img_seq = [] rgb_img = env.render() print(state) for t in range(env.env.T * 10): print("Executing Time step: {}".format(t + 1))