def run_episode(config, parameters, actions, counters, ws, gw, mc, episode): logging.debug("starting new episode") flower_picked = False # can actually skip this and use reward value for while loop, but this makes it easier to see what's going on. reward = 0 # reset action for current episode action = None #episode action count ep_actioncount = 0 # episode will continue until a flower is picked while not flower_picked: #select random action action = np.random.choice(actions.keys()) logging.debug("ACTION: %s" % (action)) # reward is 10 if flower was picked else 0 reward = actionutils.take_action(action, config['walk_stride'], config['look_angle'], mc) # increase number of actions taken counters['actioncount'] += 1 # allow action animation to finish before observing new state time.sleep(0.8) # send q-value to websocket api server if that setting is enabled message = { 'action': action, 'q_value': actions[action].value, 'step': counters['actioncount'], 'episode': episode, 'round': counters['roundcount'] } if config['graph_server']: ws.send(json.dumps(message)) # episode is over if flower was picked if reward != 0: flower_picked = True counters['flowercount'] += 1 ep_actioncount += 1 logging.debug("actions taken this episode: %s" % (ep_actioncount)) # reset action before next step action = None
def run_episode(config, parameters, actions, counters, ws, gw, mc, episode): logging.debug("starting new episode") flower_picked = False # can actually skip this and use reward value for while loop, but this makes it easier to see what's going on. reward = 0 #init states states = [] # observe initial state, states[0], and add it to states states.append(observe_state(gw)) # reset action for current episode action = None #episode action count ep_actioncount = 0 # episode will continue until a flower is picked while not flower_picked: action_strategy = "" # check if enough red is in the frame for innate chop redrum = is_it_red(states[0]) if redrum: if np.random.uniform(0, 1) < parameters['chop_prob']: action = 'chop' action_strategy = "innate" logging.debug("INNATE: chop") # if we still don't have an action we should explore/exploit if action == None: # select action depending on explore/exploit probability epsilon if np.random.uniform(0, 1) < parameters['epsilon']: #explore action = np.random.choice(actions.keys()) action_strategy = "explore" logging.debug("EXPLORE: %s" % (action)) else: # exploit action = select_optimal_action(actions) action_strategy = "exploit" logging.debug("EXPLOIT: %s" % (action)) # reward is 10 if flower was picked else 0 reward = actionutils.take_action(action, config['walk_stride'], config['look_angle'], mc) # increase number of actions taken counters['actioncount'] += 1 # allow action animation to finish before observing new state time.sleep(0.8) # observe new state, states[1] states.append(observe_state(gw)) actions[action].update( states, reward, actions) #update function will do forward and backprop for weights # update action datastream update_action_datastream(action, actions, counters, action_strategy, episode) # send q-value to websocket api server if that setting is enabled message = { 'action': action, 'q_value': actions[action].value, 'step': counters['actioncount'], 'episode': episode, 'round': counters['roundcount'] } if config['graph_server']: ws.send(json.dumps(message)) # new state, states[1], becomes states[0] by deleting states[0] del states[0] # episode is over if flower was picked if reward != 0: flower_picked = True counters['flowercount'] += 1 ep_actioncount += 1 logging.debug("actions taken this episode: %s" % (ep_actioncount)) # reset action before next step action = None
def run_episode(config, parameters, actions, counters, ws, gw, mc, episode): logging.debug("starting new episode") flower_picked = False # can actually skip this and use reward value for while loop, but this makes it easier to see what's going on. reward = 0 #init states states = [] # observe initial state, states[0], and add it to states states.append( observe_state(gw) ) # reset action for current episode action = None #episode action count ep_actioncount = 0 # episode will continue until a flower is picked while not flower_picked: action_strategy = "" # check if enough red is in the frame for innate chop redrum = is_it_red(states[0]) if redrum: if np.random.uniform(0,1) < parameters['chop_prob']: action = 'chop' action_strategy = "innate" logging.debug("INNATE: chop") # if we still don't have an action we should explore/exploit if action == None: # select action depending on explore/exploit probability epsilon if np.random.uniform(0,1) < parameters['epsilon']: #explore action = np.random.choice(actions.keys()) action_strategy = "explore" logging.debug("EXPLORE: %s" % (action)) else: # exploit action = select_optimal_action(actions) action_strategy = "exploit" logging.debug("EXPLOIT: %s" % (action)) # reward is 10 if flower was picked else 0 reward = actionutils.take_action(action, config['walk_stride'], config['look_angle'], mc) # increase number of actions taken counters['actioncount'] += 1 # allow action animation to finish before observing new state time.sleep(0.8) # observe new state, states[1] states.append( observe_state(gw) ) actions[action].update(states, reward, actions) #update function will do forward and backprop for weights # update action datastream update_action_datastream(action,actions,counters,action_strategy,episode) # send q-value to websocket api server if that setting is enabled message = { 'action': action, 'q_value': actions[action].value, 'step': counters['actioncount'], 'episode': episode, 'round': counters['roundcount'] } if config['graph_server']: ws.send(json.dumps(message)) # new state, states[1], becomes states[0] by deleting states[0] del states[0] # episode is over if flower was picked if reward != 0: flower_picked = True counters['flowercount'] += 1 ep_actioncount += 1 logging.debug("actions taken this episode: %s" % (ep_actioncount)) # reset action before next step action = None