Python greedy_policy Examples, utils.greedy_policy Python Examples

Example #1

0

Show file

 def learning_method(self,
                     gamma=0.9,
                     alpha=0.1,
                     epsilon=1e-5,
                     display=False,
                     lambda_=None):
     self.state = self.env.reset()
     s0 = self.state
     if display:
         self.env.render()
     # a0 = self.perform_policy(s0, epsilon)
     # print(self.action_t.name)
     time_in_episode, total_reward = 0, 0
     is_done = False
     while not is_done:
         # add code here
         a0 = self.perform_policy(s0, epsilon)
         s1, r1, is_done, info, total_reward = self.act(a0)
         if display:
             self.env.render()
         self.policy = greedy_policy
         a1 = greedy_policy(self.A, s1, self.Q)
         old_q = get_dict(self.Q, s0, a0)
         q_prime = get_dict(self.Q, s1, a1)
         td_target = r1 + gamma * q_prime
         #alpha = alpha / num_episode
         new_q = old_q + alpha * (td_target - old_q)
         set_dict(self.Q, new_q, s0, a0)
         # s0, a0 = s1, a1
         s0 = s1
         time_in_episode += 1
     if display:
         print(self.experience.last_episode)
     return time_in_episode, total_reward

Example #2

0

Show file

def prediction_process(args, action_queue, experience_queue, work, ready, can_predict, should_reset, iteration, path_queue):
	# Setup model
	ts = time.time()
	first = True
	reward = 5.0
	discount_factor = 0.5
	path = path_queue.get()
	image_path = path[0]; depth_path = path[1]; pc_path = path[2]; vis_path = path[3];  mixed_paths = path[4]; feat_paths = path[5]
	trainer = Trainer(reward, discount_factor, False, args.primitive_lr, args.densenet_lr)
	trainer.behavior_net.load_state_dict(torch.load(args.model))
	trainer.target_net.load_state_dict(trainer.behavior_net.state_dict())
	ready.value = True
	cv2.namedWindow("prediction")
	print("[Prediction Thread] Load model took %f seconds. Start prediction thread" %(time.time()-ts))
	while work.value:
		if should_reset.value:
			print("[Prediction Thread] Receive reset command")
			if first: 
				print("[Prediction Thread] Already in initial state, abort reset request...")
				should_reset.value = False
				ready.value = True
				continue
			ts = time.time()
			ready.value = False
			trainer.behavior_net.load_state_dict(torch.load(args.model))
			first = True
			path = path_queue.get()
			image_path = path[0]; depth_path = path[1]; pc_path = path[2]; vis_path = path[3];  mixed_paths = path[4]; feat_paths = path[5]
			print("[Prediction Thread] Reset complete! Took {} seconds".format(time.time()-ts))
			should_reset.value = False
			ready.value = True
			continue
		if not first:
			while experience_queue.empty() and not should_reset.value and work.value:
				pass
		if not experience_queue.empty():
			print("[Prediction Thread] Got experience, updating network...")
			transition = experience_queue.get()
			color = cv2.imread(transition.color)
			depth = np.load(transition.depth)
			next_color = cv2.imread(transition.next_color)
			next_depth = np.load(transition.next_depth)
			pixel_index = transition.pixel_idx
			td_target = trainer.get_label_value(transition.reward, next_color, next_depth, transition.is_empty, pixel_index[0])
			trainer.backprop(color, depth, pixel_index, td_target, 1.0, 1, True, True)
		if can_predict.value:
			if first: first = False
			print("[Prediction Thread] Start prediction")
			pc_response = _get_pc(iteration.value, True, pc_path)
			color, depth, points = utils.get_heightmap(pc_response.pc, image_path, depth_path, iteration.value)
			suck_1_prediction, suck_2_prediction, grasp_prediction = trainer.forward(color, depth, is_volatile=True)
			heatmaps, mixed_imgs = utils.save_heatmap_and_mixed(suck_1_prediction, suck_2_prediction, grasp_prediction, feat_paths, mixed_paths, color, iteration.value)
			action, action_str, pixel_index, angle = utils.greedy_policy(suck_1_prediction, suck_2_prediction, grasp_prediction)
			visual_img = utils.draw_image(mixed_imgs[pixel_index[0]], False, pixel_index, vis_path + "vis_{:06}.jpg".format(iteration.value))
			cv2.imshow("prediction", cv2.resize(visual_img, None, fx=2, fy=2)); cv2.waitKey(33)
			utils.print_action(action_str, pixel_index, points[pixel_index[1], pixel_index[2]])
			action_queue.put([action, action_str, points[pixel_index[1], pixel_index[2]], angle, pixel_index])
			can_predict.value = False
	print("[Prediction Thread] Prediction thread stop")

Example #3

0

Show file

File: main.py Project: sean85914/rl_pnp

			grasp_name = mixed_path + "grasp_{:06}_idx_{}.jpg".format(iteration, rotate_idx)
			cv2.imwrite(grasp_name, grasp_mixed_idx)
		print "[{:.6f}]: suck max: \033[0;34m{}\033[0m| grasp max: \033[0;35m{}\033[0m".format(time.time(), \
                                                   np.max(suck_predictions), np.max(grasp_predictions))
		explore = -1 # None
		# Policy decider
		if not testing: # Train
			if not grasp_only:
				explore, action, action_str, pixel_index, angle = \
					utils.epsilon_greedy_policy(epsilon_, suck_predictions, grasp_predictions)
			else:
				explore, action, action_str, pixel_index, angle = \
					utils.grasp_epsilon_greedy_policy(epsilon_, grasp_predictions)
		if testing: # Test
			if not grasp_only:
				action, action_str, pixel_index, angle = utils.greedy_policy(suck_predictions, grasp_predictions)
			else: # Grasp-only
				action = 0
				action_str = 'grasp'
				pixel_index, angle = utils.grasp_only_policy(grasp_predictions)
		explore_list.append(explore)
		if explore == 1: print "Use exploring..."
		del suck_predictions, grasp_predictions, state_feat
		print "[%f]: Take action: \033[0;31m %s\033[0m at \
\033[0;32m(%d, %d)\033[0m with theta \033[0;33m%f \033[0m" %(time.time(), action_str, pixel_index[1], \
                                                             pixel_index[2], angle)
		# Draw color + heatmap + motion
		visual_img = None
		if action: # SUCK
			visual_img = utils.draw_image(suck_mixed, action, pixel_index)
		else: # GRASP

Example #4

0

Show file

File: main.py Project: sean85914/rl_pnp

 print "Forward past: {} seconds".format(time.time() - ts)
 heatmaps, mixed_imgs = utils.save_heatmap_and_mixed(
     suck_1_prediction, suck_2_prediction, grasp_prediction,
     feat_paths, mixed_paths, color, iteration)
 # Standarize predictions to avoid bias between them
 #suck_1_prediction = utils.standarization(suck_1_prediction);suck_2_prediction = utils.standarization(suck_2_prediction)
 #grasp_prediction = utils.standarization(grasp_prediction)
 # SELECT ACTION
 if not testing:  # Train
     explore, action, action_str, pixel_index, angle = utils.epsilon_greedy_policy(
         epsilon_, suck_1_prediction, suck_2_prediction,
         grasp_prediction, depth, diff_path, iteration,
         specific_tool)
 else:  # Testing
     action, action_str, pixel_index, angle = utils.greedy_policy(
         suck_1_prediction, suck_2_prediction, grasp_prediction,
         specific_tool)
     explore = False
 explore_list.append(explore)
 target_list.append(pixel_index)
 position_list.append(points[pixel_index[1], pixel_index[2]])
 del suck_1_prediction, suck_2_prediction, grasp_prediction
 utils.print_action(action_str, pixel_index,
                    points[pixel_index[1], pixel_index[2]])
 # Save (color heightmap + prediction heatmap + motion primitive and corresponding position), then show it
 visual_img = utils.draw_image(
     mixed_imgs[pixel_index[0]], explore, pixel_index,
     vis_path + "vis_{:06}.jpg".format(iteration))
 cv2.imshow("prediction",
            cv2.resize(visual_img, None, fx=2, fy=2))
 cv2.waitKey(33)