class AgentPolicyGradient: def __init__(self, n_x, n_y, learning_rate = 0.02, reward_decay=0.99, load_path=None, save_path=None): self.PG = PolicyGradient(n_x, n_y, learning_rate=learning_rate, reward_decay=reward_decay, load_path=load_path, save_path=save_path ) def choose_action(self, observation): return self.PG.choose_action(observation) def store_transition(self, s, a, r): return self.PG.store_transition(s,a,r) def learn(self): return self.PG.learn() def plot_cost(self): import matplotlib matplotlib.use('TkAgg') import matplotlib.pyplot as plt plt.plot(np.arange(len(self.PG.cost_history)), self.PG.cost_history) plt.ylabel('Cost Ex') plt.xlabel('Training Steps Ex') plt.show() def crashed(self): episode_rewards_sum = sum(self.PG.episode_rewards) return episode_rewards_sum < -250 def episode_reward(self): episode_rewards_sum = sum(self.PG.episode_rewards) return episode_rewards_sum def costs(self): return self.PG.costs()
for episode in range(EPISODES): # start nauki observation = env.reset() episode_reward = 0 while True: if RENDER_ENV: env.render() # 1. Choose an action based on observation action = PG.choose_action(observation) # 2. Take action in the environment observation_, reward, done, info = env.step(action) # 3. Store transition for training PG.store_transition(observation, action, reward) if done: episode_rewards_sum = sum(PG.episode_rewards) rewards.append(episode_rewards_sum) max_reward_so_far = np.amax(rewards) print("==========================================") print("Episode: ", episode) print("Reward: ", episode_rewards_sum) print("Max reward so far: ", max_reward_so_far) # 4. Train neural network discounted_episode_rewards_norm = PG.learn() # Renderuj gre dopiero gdy program uzyska minimalny wynik RENDER_REWARD_MIN
def simulation(): users_num = 1 action_rewards = [10, 9, 1, 1, 1, 1, 1, 1, 1, 1] actions = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] observations = [[random.randint(0, i * 10) for i in range(1, 4)] for j in range(1, 101)] # nums of items to recommend K = 2 load_version = 1 save_version = load_version + 1 load_path = "output/weights/topk{}.ckpt".format(load_version) save_path = "output/weights/topk{}.ckpt".format(save_version) EPISODES = 5000 RENDER_ENV = True rewards = [] PG = PolicyGradient(n_x=len(observations[0]), n_y=len(actions), s0=observations[random.randint(0, len(observations) - 1)], learning_rate=0.005, reward_decay=1, load_path=None, save_path=save_path, weight_capping_c=2**3, k=K, b_distribution='uniform') for episode in range(EPISODES): episode_reward = 0 tic = time.clock() done = False while True: ''' TODO:initialize the env ''' if RENDER_ENV: observation = observations[random.randint( 0, len(observations) - 1)] # 1. Choose an action based on observation # action = PG.uniform_choose_action(observation) action = PG.choose_action(observation) # 2. Take action in the environment observation_, reward = observations[random.randint( 0, len(observations) - 1)], action_rewards[action] # 4. Store transition for training PG.store_transition(observation, action, reward) toc = time.clock() elapsed_sec = toc - tic if elapsed_sec > 120: done = True if len(PG.episode_observations) > 100: done = True if done: episode_rewards_sum = sum(PG.episode_rewards) rewards.append(episode_rewards_sum) max_reward_so_far = np.amax(rewards) PG.cost_history.append(episode_rewards_sum) print("==========================================") print("Episode: ", episode) print("Seconds: ", elapsed_sec) print("Reward: ", episode_rewards_sum) print("Max reward so far: ", max_reward_so_far) #print(PG.outputs_softmax) print("distribution at {} is :{}".format( PG.s0, PG.get_distribution(PG.s0))) # 5. Train neural network discounted_episode_rewards_norm = PG.learn() break # Save new observation observation = observation_ PG.plot_cost() plt.bar(actions, PG.get_distribution(PG.s0)) plt.xlabel("action") # 显示纵轴标签 plt.ylabel("probability") # 显示图标题 plt.title("top-k correction policy") plt.show()
d = () for b in range(len(action_list)): d += (spaces.Discrete(int(action_list[b])), ) action_ = spaces.Tuple(d) obs, reward_step, done, info = env.step( action_) #获取这一eposide的奖励 ay.append(reward_step + 0.5) plt.clf() # 清除之前画的图 plt.plot(ax, ay) # 画出当前 ax 列表和 ay 列表中的值的图形 plt.xlabel('step') plt.ylabel('吞吐率') plt.pause(0.1) # 暂停一秒 plt.ioff() # 关闭画图的窗口 reward = reward_step if stepIdx > 100: s, a, r = PG.store_transition(observation_step, action, reward) if stepIdx % 6 == 0 and stepIdx > 100: PG.learn() for k in range(len(observation)): ss = observation[k].copy() ss.extend(matrixOfChanAlloc.copy().reshape( 1, nOfenb * nOfchannel).tolist()[0]) # print(ss) observation_step = np.array(ss).reshape( nOfenb * nOfchannel + 4, 1).ravel() print("observation_step: ", observation_step) if observation_step[1] > 0: action = PG.choose_action1(observation_step, matrixOfChanAlloc, stepIdx) if action < 12: action_list.append(observation_step[0])
1] > 0: #判断RNTI是否大于0 是否为有效请求 action = PG.choose_action1(observation_step, matrixOfChanAlloc, observation[k][0]) #选取动作 if action < nOfchannel: #判断是否为有效动作 observation[k][4] = action #改变状态 addaction(observation[k][0], observation[k][1], action, action_list) #存储分配策略到action_list else: addaction(0, 0, 0, action_list) #空动作 reward = 0 #eposide没有结束reward为0 if stepIdx > 100 and k < numue - 1: #stepIndex大于100开始进入学习过程,开始存储信息 s, a, r = PG.store_transition( observation_step, action + observation[k][0] * nOfchannel, reward) #episode结束,传递动作到NS-3,并返回obs,reward等信息 action_tuple = listTotuple(action_list) obs, reward_step, done, info = env.step( action_tuple) #获取这一eposide的奖励 ay.append(reward_step) reward = reward_step if stepIdx > 100: #存储该episode最后一步的信息 s, a, r = PG.store_transition( observation_step, action + observation[numue - 1][0] * nOfchannel, reward)
PG = PolicyGradient(n_actions=env.action_space.n, n_features=env.observation_space.shape[0], lr=0.02, gamma=0.99, output_graph=FLAGS.output_graph) for i in range(FLAGS.episode): s = env.reset() while True: if RENDER: env.render() action = PG.choose_action(s) s_, r, done, info = env.step(action) PG.store_transition(s_, action, r) if done: episode_rs_sum = sum(PG.ep_rs) if 'running_reward' not in globals(): running_reward = episode_rs_sum else: running_reward = running_reward * 0.99 + episode_rs_sum * 0.01 if running_reward > FLAGS.display_threshold: RENDER = True print('episode:', i, ' reward:', running_reward) norm_reward = PG.learn() if i == 30: plt.plot(norm_reward) plt.xlabel('episode steps')
def simulation(): users_num = 1 ''' action_rewards = {'11':4,'12':1,'13':1,'14':1,'21':1,'22':2,'23':3,'24':16,'31':1,'32':2,'33':3,'34':4} observation_action_transfer = {'11':[2],'12':[2],'13':[2],'14':[2],'21':[3],'22':[3],'23':[3],'24':[3],\ '31':[1],'32':[1],'33':[3],'34':[3]} actions = [1,2,3,4] observations = [[1],[2],[3]] ''' action_rewards = {'11': 5,'12': 0,'13': 0,'14':0,'15':0,'16':13, \ '21': 10,'22': 0, '23': 0,'24':0,'25':0,'26':8} observation_action_transfer = {'11': [1,1], '12': [1,1], '13': [1,1],'14':[1,1],'15':[1,1],'16':[1,1], \ '21': [1,1], '22': [1,1], '23': [1,1],'24':[1,1],'25':[1,1],'26':[0,1]} actions = [1, 2, 3, 4, 5, 6] observations = [[0, 1], [1, 1]] # nums of items to recommend K = 2 load_version = 4 save_version = load_version + 1 load_path = "output/weights/topk{}.ckpt".format(load_version) save_path = "output/weights/topk{}.ckpt".format(save_version) EPISODES = 3000 RENDER_ENV = True rewards = [] PG = PolicyGradient(n_x=len(observations[0]), n_y=len(actions), s0=observations[-1], learning_rate=0.001, reward_decay=1, load_path=None, save_path=save_path, weight_capping_c=2**3, k=K, b_distribution='uniform') for episode in range(EPISODES): episode_reward = 0 tic = time.clock() done = False while True: ''' TODO:initialize the env ''' if RENDER_ENV: observation = PG.episode_observations[-1] #print(observation) # 1. Choose an action based on observation #action = PG.uniform_choose_action(observation) action = PG.choose_action(observation) # 2. Take action in the environment observation_, reward = observation_action_transfer[str(sum(observation))+str(actions[action])], \ action_rewards[str(sum(observation))+str(actions[action])] # 4. Store transition for training PG.store_transition(observation_, action, reward) #print(PG.episode_observations) #print(PG.episode_actions) #print(PG.episode_rewards) toc = time.clock() elapsed_sec = toc - tic if elapsed_sec > 120: done = True if len(PG.episode_observations) > 100: done = True if done: episode_rewards_sum = sum(PG.episode_rewards) rewards.append(episode_rewards_sum) max_reward_so_far = np.amax(rewards) PG.cost_history.append(episode_rewards_sum) print("==========================================") print("Episode: ", episode) print("Seconds: ", elapsed_sec) print("Reward: ", episode_rewards_sum) print("Max reward so far: ", max_reward_so_far) #print(PG.outputs_softmax) #print(PG.episode_rewards) # 5. Train neural network print("distribution at {} is :{}".format( observations[0], PG.get_distribution(observations[0]))) print("distribution at {} is :{}".format( observations[1], PG.get_distribution(observations[1]))) discounted_episode_rewards_norm = PG.learn() break # Save new observation observation = observation_ PG.plot_cost() plt.bar(actions, PG.get_distribution(observations[0])) plt.xlabel("action at state[0,1]") # 显示纵轴标签 plt.ylabel("probability") # 显示图标题 plt.title("policy distribution at state[0,1]") plt.show() plt.bar(actions, PG.get_distribution(observations[1])) plt.xlabel("action at state[1,1]") # 显示纵轴标签 plt.ylabel("probability") # 显示图标题 plt.title("policy distribution at state[1,1]") plt.show()
def train(self, max_episode=10, max_path_length=200, verbose=0): env = self.env avg_reward_sum = 0. #f_eps = open("episode.csv","w") #write_eps = csv.write(f_eps) for e in range(max_episode): env._reset() observation = env._reset() game_over = False reward_sum = 0 inputs = [] outputs = [] predicteds = [] rewards = [] #f_iter = open("episode_{0}.csv".format(e),"w") #write_iter = csv.writer(f_iter) f_episode = "episode_{0}.csv".format(e) os.system("rm -rf {0}".format(f_episode)) print(observation[0].shape, observation[1].shape) RL = PolicyGradient( n_actions=self.env.action_space.n, # n_features=observation.shape[0], learning_rate=0.02, reward_decay=0.995, # output_graph=True, ) while not game_over: action, aprob = RL.choose_action(observation) inputs.append(observation) predicteds.append(aprob) y = np.zeros([self.env.action_space.n]) y[action] = 1. outputs.append(y) observation, reward, actual_reward, game_over, info = self.env._step( action) reward_sum += float(actual_reward) #rewards.append(float(reward)) rewards.append(float(reward_sum)) RL.store_transition(observation, action, rewards) # check memory for RNN model if len(inputs) > self.max_memory: del inputs[0] del outputs[0] del predicteds[0] del rewards[0] if verbose > 0: if env.actions[action] == "LONG" or env.actions[ action] == "SHORT": #if env.actions[action] == "LONG" or env.actions[action] == "SHORT" or env.actions[action] == "HOLD": color = bcolors.FAIL if env.actions[ action] == "LONG" else bcolors.OKBLUE print("%s:\t%s\t%.2f\t%.2f\t" % (info["dt"], color + env.actions[action] + bcolors.ENDC, reward_sum, info["cum"]) + ("\t".join([ "%s:%.2f" % (l, i) for l, i in zip(env.actions, aprob.tolist()) ]))) #write_iter.writerow("%s:\t%s\t%.2f\t%.2f\t" % (info["dt"], env.actions[action], reward_sum, info["cum"]) + ("\t".join(["%s:%.2f" % (l, i) for l, i in zip(env.actions, aprob.tolist())]))) os.system("echo %s >> %s" % ("%s:\t%s\t%.2f\t%.2f\t" % (info["dt"], env.actions[action], reward_sum, info["cum"]) + ("\t".join([ "%s:%.2f" % (l, i) for l, i in zip(env.actions, aprob.tolist()) ])), f_episode)) avg_reward_sum = avg_reward_sum * 0.99 + reward_sum * 0.01 toPrint = "%d\t%s\t%s\t%.2f\t%.2f" % ( e, info["code"], (bcolors.FAIL if reward_sum >= 0 else bcolors.OKBLUE) + ("%.2f" % reward_sum) + bcolors.ENDC, info["cum"], avg_reward_sum) print(toPrint) if self.history_filename != None: os.system("echo %s >> %s" % (toPrint, self.history_filename)) discounted_rewards_ = RL.learn() # train dim = len(inputs[0]) inputs_ = [[] for i in range(dim)] for obs in inputs: for i, block in enumerate(obs): inputs_[i].append(block[0]) inputs_ = [np.array(inputs_[i]) for i in range(dim)] outputs_ = np.vstack(outputs) predicteds_ = np.vstack(predicteds) rewards_ = np.vstack(rewards) print("shape: ", np.shape(rewards), np.shape(discounted_rewards_)) #outputs_ *= discounted_rewards_ for i, r in enumerate(zip(rewards, discounted_rewards_)): reward, discounted_reward = r if verbose > 1: # print (outputs_[i],) print(outputs_[i], ) if verbose > 0: print(predicteds_[i], outputs_[i], reward, discounted_reward) print("fit model input.shape %s, output.shape %s" % ([inputs_[i].shape for i in range(len(inputs_))], outputs_.shape)) np.set_printoptions(linewidth=200, suppress=True) print("currentTargetIndex:", env.currentTargetIndex)
class runPG(): n_inputs = 4 n_outputs = 4 # right and left for each finger # n_outputs = 8 # right, left and stop for each finger net = 0 X = 0 A = np.array([[-1, -1], [-1, 1], [1, -1], [1, 1], [0, -1], [0, 1], [-1, 0], [1, 0]]) mode = 5 reward_mode = 2 R = [] gripper_closed = False stLearning = True possible_plot = False def __init__(self): rospy.init_node('runPG', anonymous=True) if self.mode == 5: self.n_inputs = 4 if self.mode == 8: self.n_inputs = 8 self.RL = PolicyGradient( n_actions=self.n_outputs, n_features=self.n_inputs, learning_rate=0.02, reward_decay=0.99, load_saved_net=False, # output_graph=True, ) rospy.Subscriber('/RL/gripper_status', String, self.callbackGripperStatus) rospy.Service('/RL/net', net_eval, self.EvalNet) rospy.Service('/RL/start_learning', Empty, self.start_learning) obs_srv = rospy.ServiceProxy('/RL/observation', observation) drop_srv = rospy.ServiceProxy('/RL/IsObjDropped', IsDropped) move_srv = rospy.ServiceProxy('/RL/MoveGripper', TargetAngles) open_srv = rospy.ServiceProxy('/RL/OpenGripper', Empty) close_srv = rospy.ServiceProxy('/RL/CloseGripper', Empty) rospy.sleep(3) o = open_srv() episode_count = 0 rate = rospy.Rate(15) # 15hz while not rospy.is_shutdown(): if self.stLearning: ## Start episode ## episode_count += 1 # Close gripper raw_input( "Place object between fingers and press Enter to close gripper..." ) close_srv() while not self.gripper_closed: rate.sleep() raw_input("Remove table and press Enter to start episode...") # Get observation obs = np.array(obs_srv().state) self.VT = [] while True: # Choose action action = self.RL.choose_action(obs) # Act suc = move_srv(self.A[action]).success rospy.sleep(0.05) rate.sleep() if suc: # Get observation obs_ = np.array(obs_srv().state) fail = drop_srv( ).dropped # Check if dropped - end of episode else: # End episode if overload or angle limits reached rospy.logerr( '[RL] Failed to move gripper. Episode declared failed.' ) fail = True reward, done = self.transition_reward(obs_, fail) self.RL.store_transition(obs, action, reward) obs = obs_ if done: ep_rs_sum = sum(self.RL.ep_rs) if 'running_reward' not in globals(): running_reward = ep_rs_sum else: running_reward = running_reward * 0.99 + ep_rs_sum * 0.01 print("*** episode: " + str(episode_count) + ", episode reward: " + str(ep_rs_sum) + ", running reward: " + str(int(running_reward)) + " ***") vt = self.RL.learn() self.R.append(running_reward) self.possible_plot = True break rate.sleep() elif self.possible_plot: self.plot_sav() self.possible_plot = False # Open gripper if self.gripper_closed: o = open_srv() rospy.sleep(0.2) # self.stLearning = False # print(obs_srv().state) # rospy.spin() rate.sleep() def plot_sav(self): plt.plot(range(len(self.R)), self.R) # plot the episode vt plt.xlabel('episode steps') plt.ylabel('normalized state-action value') plt.show() def EvalNet(self, msg): a = 0 return {'action': a} def callbackGripperStatus(self, msg): self.gripper_closed = msg.data == "closed" def start_learning(self, msg): self.stLearning = not self.stLearning return EmptyResponse() def transition_reward(self, obs, fail): # Keep moving as much as possible if self.reward_mode == 1: if fail: reward = 0. else: reward = 1. done = fail # Get to a certain coodrinate if self.reward_mode == 2: if fail: reward = -3. else: reward = -1. done = fail if obs[0] > 135.: raw_input('Reached goal, x = %f.' % obs[0]) reward = 5. done = True return reward, done
EPISODES = 500 # 收集500条序列 MAX_STEP = 1500 # 每条序列最多1500步 rewards = [] # 记录每条序列回报的list if __name__ == "__main__": PG = PolicyGradient(n_input=env.observation_space.shape[0], n_output=env.action_space.n) for episode in range(EPISODES): s = env.reset() for i in range(MAX_STEP): if RENDER_FLAG: env.render() # 与环境交互 action = PG.choose_action(s) s_, reward, done, _ = env.step(action) PG.store_transition(s, action, reward) # 如果杆倒了或超出屏幕 if done: ep_rewards_sum = np.sum(PG.ep_rewards) if ep_rewards_sum > 1000: RENDER_FLAG = True else: RENDER_FLAG = False rewards.append(ep_rewards_sum) PG.learn() break # 如果达到最大限制步数 if i == (MAX_STEP - 1): RENDER_FLAG = True rewards.append(i) PG.learn()
a_dim = env.action_space.n, learning_rate = 0.02, reward_decay = 0.99, #output_graph = True ) for i_epsiode in range(3000): s = env.reset() while True: if RENDER: env.render() a = RL.choose_action(s) s_,r,done,info = env.step(a) RL.store_transition(s,a,r) if done: ep_rs_sum = sum(RL.ep_rs) if 'running_reward' not in globals(): running_reward = ep_rs_sum else: running_reward = running_reward * 0.99 + ep_rs_sum * 0.01 if running_reward > DISPLAY_REWARD_THRESHOLD: RENDER = True print('episode:',i_epsiode,"reward:",int(running_reward)) vt = RL.learn() if i_epsiode == 0:
print("\nPIZZA CONFIG: ", pizza_config) print("\nSTATE: ", state) print("\n\nSTATE[0]", state[0]) #state[0] #get only first value of tuple for step in range(STEPS): if RENDER_ENV: env.render() # sample one action with the given probability distribution # 1. Choose an action based on observation action = PG.choose_action(state) # 2. Take action in the environment state_, reward, done, info = env.step(ACTIONS[action]) # 3. Store transition for training PG.store_transition(preprocess(state), action, reward) # Save new state #state = state_ if done: episode_rewards_sum = sum(PG.episode_rewards) rewards.append(episode_rewards_sum) max_reward_so_far = np.amax(rewards) print("==========================================") print("p_game: ", p_game) print("batch: ", batch) print("Reward: ", episode_rewards_sum) print("Max reward so far: ", max_reward_so_far) # 4. Train neural network
class runPG(): n_inputs = 4 # n_outputs = 4 # right and left for each finger n_outputs = 8 # right, left and stop for each finger max_episodes = 1200 max_steps = 2500 net = 0 X = 0 A = np.array([[-1, -1], [1, -1], [-1, 1], [1, 1], [0, -1], [0, 1], [-1, 0], [1, 0]]) mode = 5 reward_mode = 3 R = [] g = np.array([-35.0, 104.0], dtype='f') # Goal gripper_closed = False stLearning = True # Enable learning possible_plot = False # For reward mode 3 prev_dis2goal = 1e9 def __init__(self): rospy.init_node('runPG', anonymous=True) if self.mode == 5: self.n_inputs = 4 if self.mode == 8: self.n_inputs = 8 self.RL = PolicyGradient( n_actions = self.n_outputs, n_features = self.n_inputs, learning_rate=0.001, reward_decay=0.98, load_saved_net=True, # output_graph=True, ) rospy.Subscriber('/RL/gripper_status', String, self.callbackGripperStatus) rospy.Service('/RL/net', net_eval, self.EvalNet) rospy.Service('/RL/start_learning', Empty, self.start_learning) obs_srv = rospy.ServiceProxy('/RL/observation', observation) drop_srv = rospy.ServiceProxy('/RL/IsObjDropped', IsDropped) move_srv = rospy.ServiceProxy('/RL/MoveGripper', TargetAngles) reset_srv = rospy.ServiceProxy('/RL/ResetGripper', Empty) pub_goal = rospy.Publisher('/RL/Goal', Float32MultiArray, queue_size=10) gg = Float32MultiArray() gg.data = self.g episode_count = 0 rate = rospy.Rate(100) # 100hz while not rospy.is_shutdown(): if self.stLearning: ## Start episode ## episode_count += 1 self.prev_dis2goal = 1e9 # Set gripper reset_srv() while not self.gripper_closed: rate.sleep() # Get observation obs = np.array(obs_srv().state) self.VT = [] step = 0 while True: step += 1 print('[RL] Step %d in episode %d, distance to goal: %f.' % (step, episode_count, self.prev_dis2goal)) pub_goal.publish(gg) # Choose action action = self.RL.choose_action(obs) # Act suc = move_srv(self.A[action]).success rospy.sleep(0.05) rate.sleep() if suc: # Get observation obs_ = np.array(obs_srv().state) fail = drop_srv().dropped # Check if dropped - end of episode else: # End episode if overload or angle limits reached rospy.logerr('[RL] Failed to move gripper. Episode declared failed.') fail = True reward, done = self.transition_reward(obs_, fail) self.RL.store_transition(obs, action, reward) obs = obs_ if step > self.max_steps: done = True if done: ep_rs_sum = sum(self.RL.ep_rs) if 'running_reward' not in globals(): running_reward = ep_rs_sum else: running_reward = running_reward * 0.99 + ep_rs_sum * 0.01 print("*** episode: " + str(episode_count) + ", episode reward: " + str(ep_rs_sum) + ", running reward: " + str(int(running_reward)) + " ***") vt = self.RL.learn() self.R.append(running_reward) self.possible_plot = True break rate.sleep() elif self.possible_plot: self.plot_sav() self.possible_plot = False if self.max_episodes < episode_count: self.plot_sav() break rate.sleep() def plot_sav(self): plt.plot(range(len(self.R)),self.R) # plot the episode vt plt.xlabel('episode steps') plt.ylabel('normalized state-action value') plt.show() def EvalNet(self, msg): a = 0 return {'action': a} def callbackGripperStatus(self, msg): self.gripper_closed = msg.data == "closed" def start_learning(self, msg): self.stLearning = not self.stLearning return EmptyResponse() def transition_reward(self, obs, fail): # Keep moving as much as possible if self.reward_mode == 1: if fail: reward = 0. else: reward = 1. done = fail # Cross a line if self.reward_mode == 2: if fail: reward = -3. else: reward = -1. done = fail if obs[0] > 40.: print('Reached goal, x = %f.' % obs[0]) reward = 5. done = True # Get to a certain coordinate if self.reward_mode == 3: d = np.linalg.norm(self.g-obs[:2]) if fail or d > self.prev_dis2goal: reward = 0. else: reward = 1. done = fail if d < 5: print('Reached goal, (x,y) = (%f,%f).' % (obs[0],obs[1])) reward = 50. done = True self.prev_dis2goal = d return reward, done