def gen_test_brevitas_ort_integration(): test_ort_integration = Action( 'Test Brevitas-ORT integration', EXCLUDE_LIST, MATRIX, ORT_INTEGRATION_STEP_LIST) test_ort_integration.gen_yaml(BASE_YML_TEMPLATE, ORT_INTEGRATION_YML)
def gen_examples_pytest_yml(): pytest = Action( 'Examples Pytest', EXCLUDE_LIST + PYTEST_EXAMPLE_EXCLUDE_LIST_EXTRA, combine_od_list([MATRIX, PYTEST_MATRIX_EXTRA]), EXAMPLES_PYTEST_STEP_LIST) pytest.gen_yaml(BASE_YML_TEMPLATE, EXAMPLES_PYTEST_YML)
def monte_carlo_control(self, iters): """ Monte-Carlo control algorithm """ num_wins = 0 optimal_policy = np.zeros((self.env.dealer_values, self.env.player_values)) for episode in range(0, iters): state_episode = self.env.get_initial_state() reward_episode = 0 history = [] #sample episode while not state_episode.terminal: action = self.epsilon_greedy(state_episode) history.append([state_episode, action, reward_episode]) #update number of visits self.N[state_episode.dealer_card - 1, state_episode.player_sum - 1, Action.get_value(action)] += 1 [reward_episode, state_episode] = self.env.step(state_episode, action) #update Q for state, action, reward in history: step_size = 1.0 / self.N[state.dealer_card - 1, state.player_sum - 1, Action.get_value(action)] Gt = reward_episode error = Gt - self.Q[state.dealer_card - 1, state.player_sum - 1, Action.get_value(action)] self.Q[state.dealer_card - 1, state.player_sum - 1, Action.get_value(action)] += step_size * error if (Gt == 1): num_wins = num_wins + 1 print ("Percentage of win %.3f" % (num_wins / iters * 100.0)) #update policy based on action-value function for (dealer_sum, player_sum), value in np.ndenumerate(self.V): if self.Q[dealer_sum, player_sum, 1] > self.Q[dealer_sum, player_sum, 0]: optimal_policy[dealer_sum, player_sum] = 1 self.V[dealer_sum, player_sum] = max(self.Q[dealer_sum, player_sum, :])
def gen_pytest_yml(): pytest = Action( 'Pytest', EXCLUDE_LIST, combine_od_list([MATRIX, PYTEST_MATRIX_EXTRA]), PYTEST_STEP_LIST) pytest.gen_yaml(BASE_YML_TEMPLATE, PYTEST_YML)
def decode_action(self, a_m, a_t, state, mode): if mode == "max_probability": a_m = np.argmax(a_m) a_t = np.argmax(a_t) elif mode == "sample": #a_m += 0.01 a_m /= a_m.sum() a_m = np.random.choice(range(3), p=a_m) #a_t += 0.01 a_t /= a_t.sum() a_t = np.random.choice(range(3), p=a_t) action = Action() if a_m == 0: # left action.v_n = -1.0 elif a_m == 1: # ahead action.v_t = +1.0 elif a_m == 2: # right action.v_n = +1.0 if a_t == 0: # left action.angular = +1.0 elif a_t == 1: # stay action.angular = 0.0 elif a_t == 2: # right action.angular = -1.0 if state.detect: action.shoot = +1.0 else: action.shoot = 0.0 return action
def __init__(self): Gtk.Window.__init__(self, title="SmartTV OpenSource") self.set_default_size(get_monitors()[0].width, get_monitors()[0].height) left_bar_actions = [ Action("Back", "go-previous", lambda: Gtk.main_quit()), Action("General", "preferences-desktop", lambda: switchStack(0, self.settings_view)), Action("Look", "preferences-desktop-theme", lambda: switchStack(1, self.settings_view)), Action("Torrent", "torrent", lambda: switchStack(2, self.settings_view)), Action("Other", "preferences-other", lambda: switchStack(3, self.settings_view)) ] self.main_divider = Gtk.Box(spacing=6) self.add(self.main_divider) self.leftbar = LeftBar(actions=left_bar_actions, left_bar_width=LEFT_BAR_WIDTH, default_select=1) self.main_divider.pack_start(self.leftbar, False, True, 0) self.settings_view = MainStack([ generalSettings(), Gtk.Label(label="Look"), Gtk.Label(label="Torrent"), Gtk.Label(label="Other") ]) self.main_divider.pack_end(self.settings_view, True, True, 0)
def gen_test_brevitas_finn_integration(): test_finn_integration = Action( 'Test Brevitas-FINN integration', EXCLUDE_LIST + FINN_INTEGRATION_EXCLUDE_LIST_EXTRA, MATRIX, FINN_INTEGRATION_STEP_LIST) test_finn_integration.gen_yaml(BASE_YML_TEMPLATE, FINN_INTEGRATION_YML)
def gen_test_brevitas_pyxir_integration(): test_pyxir_integration = Action( 'Test Brevitas-PyXIR integration', EXCLUDE_LIST + PYXIR_INTEGRATION_EXCLUDE_LIST_EXTRA, MATRIX, PYXIR_INTEGRATION_STEP_LIST) test_pyxir_integration.gen_yaml(BASE_YML_TEMPLATE, PYXIR_INTEGRATION_YML)
def gen_test_develop_install_yml(): test_develop_install = Action( 'Test develop install', EXCLUDE_LIST, MATRIX, TEST_INSTALL_DEV_STEP_LIST) test_develop_install.gen_yaml(BASE_YML_TEMPLATE, DEVELOP_INSTALL_YML)
def move(self, board) -> Tuple[int, int, Action]: try: row, col, action = input(PROMPT).split() return int(row), int(col), Action(int(action)) except Exception as ex: print(colored('[ERROR]', 'red'), ex) return self.move(board)
def parse_tree(dep_tree, robot: RobotSubject): ''' parse a single dependence tree, may result in several commands The parsing consists of four steps, which separately determine: 1. actions of the robot 2. relations of the actions 3. items involved in each action 4. requirements of the items ''' print("{} will do the following things:".format(robot.name)) ## find the actions to be carried out by this robot ## example (('send', 'VB'), 'nsubj', ('Alice', 'NNP')), word "send" has a nominal subject "Alice" for ((governor, gov_pos), relation, (dependent, _)) in dep_tree: if relation == "nsubj": if dependent == robot.name and gov_pos == "VB": robot.action_list.append(Action(governor)) # find the actions are combined by "and" or "or", "and" by default robot_actions = [action.name for action in robot.action_list] for ((governor, gov_pos), relation, (dependent, _)) in dep_tree: if relation == "conj:or": if governor in robot_actions and dependent in robot_actions: robot.do_all = False ## find everything related to the verbs cmd_list = [] for action in robot.action_list: action.parse_dep(dep_tree) cmd_list.extend(action.gen_command(print_result=True)) return cmd_list
def register_step(self, observation: Observation, action: Action, reward: Reward, next_observation: Observation, done: bool, action_metadata: dict) -> 'Step': self.trajectory_returns += reward if 'log_pi' in action_metadata.keys(): self.trajectory_entropy += action_metadata['log_pi'].reshape(1, -1) action_metadata.update( dict(returns=self.trajectory_returns, entropy=self.trajectory_entropy)) step = Step(state=observation.reshape(1, -1), action=action.reshape(1, -1), reward=np.array(reward, dtype=np.float32).reshape(1, -1), next_state=next_observation.reshape(1, -1), termination_masks=np.array(done, dtype='uint8').reshape(1, -1), metadata=action_metadata) if self.step_count == 0: self.initialize_records(step) else: for stat, value in step.asdict().items(): self.trajectory_buffer[stat].append(value) self.step_history.append(step) self.step_count += 1 return step
def distributions(): conditions.clear() dice1 = 8 dice2 = 6 reroll_equal_to = [] reroll_lowest = 0 roll_min = 0 drop_lowest = 0 details = [ dice1, dice2, reroll_equal_to, reroll_lowest, roll_min, drop_lowest ] d20 = d20Set(1) diceset = Diceset([(dice1, dice2)]) action = Action(d20, 0, diceset, crit_numbers=[], fail_dmg_scale=0.5) stats = Statistics(action) stats.collect_statistics() dummystats.clear() dummystats.append(stats.report_statistics()) return render_template('distributions.html', imagepath='static/img/placeholder.png', stats=dummystats, conditions=conditions, collecting=False)
def epsilon_greedy(self, state): """ epsilon greedy exploration """ if state.terminal: min_num_action = 0 else: min_num_action = min(self.N[state.dealer_card - 1, state.player_sum - 1, :]) eps = self.N0 / (self.N0 + min_num_action) # print (eps) if random() < eps: return Action.getRandomAction() else: action_value = np.argmax(self.Q[state.dealer_card - 1, state.player_sum - 1,:]) return Action.get_action(action_value)
def __init__(self): self.main_view = MainStack([ Trending(), Gtk.Label(label="Apps"), Movies(), Gtk.Label(label="Songs"), Gtk.Label(label="Files"), ]) left_bar_actions = [ Action("Trending", "go-home", lambda: switchStack(0, self.main_view)), Action("Apps", "view-grid", lambda: switchStack(1, self.main_view)), Action("Movies", "media-tape", lambda: switchStack(2, self.main_view)), Action("Songs", "media-optical-cd-audio", lambda: switchStack(3, self.main_view)), Action("Files", "folder", lambda: switchStack(4, self.main_view)), # Action("Settings", "open-menu", # lambda: system("python3 " + path.abspath("settings.py") + " " + argv[1])), ] Gtk.Window.__init__(self, title="SmartTV OpenSource") self.set_default_size(get_monitors()[0].width, get_monitors()[0].height) self.main_divider = Gtk.Box(spacing=6) self.add(self.main_divider) self.leftbar = LeftBar(actions=left_bar_actions, left_bar_width=LEFT_BAR_WIDTH) self.sidebar_divider = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=6) self.sidebar_divider.pack_start(WeatherBox(), False, False, 0) self.sidebar_divider.pack_start(self.leftbar, True, True, 0) settings_list = Gtk.ListBox() settings_list.set_selection_mode(Gtk.SelectionMode.NONE) settings_list.insert(ListTile("Settings", "open-menu"), 0) settings_list.connect( "row-activated", lambda x, y: system("python3 " + path.abspath( "settings.py") + " " + argv[1])) self.sidebar_divider.pack_end(settings_list, False, False, 0) self.main_divider.pack_start(self.sidebar_divider, False, False, 0) self.main_divider.pack_end(self.main_view, True, True, 0)
def store_search_statistics(self, root: Node): sum_visits = sum(child.visit_count for child in root.children.values()) action_space = (Action(index) for index in range(self.action_space_size)) self.child_visits.append([ root.children[a].visit_count / sum_visits if a in root.children else 0 for a in action_space ]) self.root_values.append(root.value())
def linear_sarsa(self, iters, lambda_, compare_to_monctecarlo = False): """ Linear Function Approximation of sarsa lambda algorithm """ if compare_to_monctecarlo: monte_carlo_iterations = 1000000 env = Environment() agent = Agent(env) agent.monte_carlo_control(monte_carlo_iterations) Q_monte_carlo = agent.Q mse_all = [] for episode in range(0, iters): E = np.zeros(self.number_of_features) #initialize state and action state = self.env.get_initial_state() reward = 0 action = self.epsilon_greedy_linear_constant(state) # self.N[state.dealer_card - 1, state.player_sum - 1, Action.get_value(action)] += 1 while not state.terminal: # update number of visits self.N[state.dealer_card - 1, state.player_sum - 1, Action.get_value(action)] += 1 [reward, state_forward] = self.env.step(state, action) action_forward = self.epsilon_greedy_linear_constant(state_forward) if not state_forward.terminal: current_estimate = reward + self.estimate_Q(state_forward, action_forward) else: current_estimate = reward previous_estimate = self.estimate_Q(state, action) delta = current_estimate - previous_estimate E = np.add(E, self.get_feature_vector(state, action)) step_size = 0.01 self.weights += step_size * delta * E E = lambda_ * E action = action_forward state = state_forward if compare_to_monctecarlo: mse_all.append(compute_mse(self.approximation_to_Q(), Q_monte_carlo)) if compare_to_monctecarlo: # print (mse_all[-1]) plt.plot(range(0, iters), mse_all, 'r-') plt.xlabel("episodes") plt.ylabel("MSE") # plt.title("lambda = 0") plt.show() for (dealer_sum, player_sum), value in np.ndenumerate(self.V): s = State(dealer_sum+1, player_sum+1) self.Q[dealer_sum, player_sum ,0] = np.dot(self.get_feature_vector(s, Action.hit), self.weights) self.Q[dealer_sum, player_sum ,1] = np.dot(self.get_feature_vector(s, Action.stick), self.weights) self.V[dealer_sum, player_sum] = max(self.estimate_Q(s,Action.hit), self.estimate_Q(s,Action.stick))
def home(): dice1 = 8 dice2 = 6 d20 = d20Set(1) diceset = Diceset([(dice1, dice2)]) action = Action(d20, 0, diceset, crit_numbers=[], fail_dmg_scale=0.5) stats = Statistics(action) stats.collect_statistics() return render_template( 'home.html' ) #, imagepath='static/img/placeholder.png', stats=stats.report_statistics())
def epsilon_greedy_linear_constant(self, state, eps_ = 0.1): """ epsilon greedy exploration with constant exploration epsilon """ eps = eps_ if random() < eps or state.terminal: return Action.getRandomAction() else: actionHit_value = sum(self.get_feature_vector(state, Action.hit) * self.weights) actionStick_value = sum(self.get_feature_vector(state, Action.stick) * self.weights) action = Action.hit if actionHit_value > actionStick_value else Action.stick return action
def addAction(self, jsonStr): tempAction = json.loads(jsonStr) #Storing JSON in a python dictionary try: action = Action(tempAction['action'], tempAction['time']) except Exception as ex: logging.exception('Caught an error') print("Couldn't create Action object") print( "Added JSON object must have two ONLY keys, 'action' and 'time'" ) return # Check if action exists in actions dictionary, if it does update avgTime and numActions, else add to the dictionary if (self.__doesExist(tempAction) == True): print("updating old action") self.actions[action.getAction()].updateAction( action.getTime() ) # Updating the current object in the dictionary to have proper numActions and avgTime else: print("inserting new action") self.actions.update({action.getAction(): action})
def next(self, action: Action = Action(), **kwargs): """ Required method for getting next state, possibly given an action. Should only update the attributes of the class. """ # YOUR CODE HERE self.terminal = kwargs.get('terminal', False) prop = action.properties if prop == 'nudge': self.properties[1] += 1 else: self.properties[0] += 1 return self
def calculate(): root = os.path.dirname(__file__) dir = os.path.join(root, 'static/img/temp/') files = os.listdir(dir) for file in files: os.remove(os.path.join(dir, file)) alpha = 0.9**len(conditions) statistics = [] i = 0 for details in conditions: d20 = d20Set(1) diceset = Diceset([(details[0], details[1])], details[2], details[3], details[4], details[5]) action = Action(d20, 0, diceset, crit_numbers=[], fail_dmg_scale=0.0) stats = Statistics(action) stats.collect_statistics() stats.plot_histogram(alpha, str(i)) i += 1 statistics.append(stats.report_statistics()) plotname = 'static/img/temp/' for detail in details: plotname = plotname + str(detail) plotname = plotname + '.png' copyconditions = conditions.copy() conditions.clear() plt.savefig(os.path.join(root, plotname)) plt.clf() return render_template('distributions.html', imagepath=plotname, stats=statistics, conditions=copyconditions, collecting=False)
def select_action(self, state: RobotState): action = Action() pos = state.pos vel = state.velocity angle = state.angle if state.detect: action.shoot = +1.0 else: action.shoot = 0.0 if ((pos[0]-self.target[0])**2 + (pos[1]-self.target[1])**2 < 0.1): self.index = random.choice(self.connected[self.index]) self.target = self.avaiable_pos[self.index] #print(self.target) #self.index = (self.index + 1) % len(self.path) #self.target = self.path[self.index] v, omega = self.move.moveTo(pos, vel, angle, self.target) action.v_t = v[0] action.v_n = v[1] action.omega = omega return action
def get_action_from_args(request_args: dict) -> Action: """Define how to map request arguments to an Action.""" # YOUR CODE HERE return Action()
def __init__(self, label, context, etype, query, parent=None): Action.__init__(self, label, context) self.etype = etype self.query = query
def __init__(self, context,parent=None): Action.__init__(self, u'Lista', context)
def test_step_with_kinematic(): env_config = configparser.RawConfigParser() env_config.read('configs/test_env.config') env_config.set('agent', 'kinematic', 'true') test_env = ENV(env_config, phase='test') test_env.reset() # test state computation states, rewards, done_signals = test_env.step((Action(1, 0), Action(1, 0))) assert np.allclose( states[0], JointState(-1, 0, 1, 0, 0.3, 2, 0, 1.0, 0, 1, 0, -1, 0, 0.3)) assert np.allclose( states[1], JointState(1, 0, -1, 0, 0.3, -2, 0, 1.0, np.pi, -1, 0, 1, 0, 0.3)) assert rewards == [0, 0] assert done_signals == [False, False] # test one-step lookahead reward, end_time = test_env.compute_reward(0, [Action(1.5, 0), None]) assert reward == -0.25 assert end_time == 1 reward, end_time = test_env.compute_reward( 0, [Action(1.5, 0), Action(1.5, 0)]) assert reward == -0.25 assert end_time == 0.5 # test collision detection states, rewards, done_signals = test_env.step((Action(1, 0), Action(1, 0))) assert np.allclose( states[0], JointState(0, 0, 1, 0, 0.3, 2, 0, 1.0, 0, 0, 0, -1, 0, 0.3)) assert np.allclose( states[1], JointState(0, 0, -1, 0, 0.3, -2, 0, 1.0, np.pi, 0, 0, 1, 0, 0.3)) assert rewards == [-0.25, -0.25] assert done_signals == [2, 2] # test reaching goal test_env = ENV(env_config, phase='test') test_env.reset() test_env.step((Action(1, np.pi / 2), Action(2, np.pi / 2))) test_env.step((Action(4, -np.pi / 2), Action(4, -np.pi / 2))) states, rewards, done_signals = test_env.step( (Action(1, -np.pi / 2), Action(2, -np.pi / 2))) assert rewards == [1, 1] assert done_signals == [1, 1]
def td_learning(self, iters, lambda_, compare_to_monctecarlo = False, trace = Trace.accumulating): """ sarsa lambda algorithm """ if compare_to_monctecarlo: monte_carlo_iterations = 1000000 env = Environment() agent = Agent(env) agent.monte_carlo_control(monte_carlo_iterations) Q_monte_carlo = agent.Q mse_all = [] for episode in range(0, iters): E = np.zeros(((self.env.dealer_values, self.env.player_values, self.env.action_values))) #initialize state and action state = self.env.get_initial_state() reward = 0 action = self.epsilon_greedy(state) while not state.terminal: # update number of visits self.N[state.dealer_card - 1, state.player_sum - 1, Action.get_value(action)] += 1 [reward, state_forward] = self.env.step(state, action) action_forward = self.epsilon_greedy(state_forward) if not state_forward.terminal: current_estimate = reward + self.Q[state_forward.dealer_card - 1, state_forward.player_sum - 1, Action.get_value(action_forward)] else: current_estimate = reward previous_estimate = self.Q[state.dealer_card - 1, state.player_sum - 1, Action.get_value(action)] delta = current_estimate - previous_estimate step_size = 1.0 / self.N[state.dealer_card - 1, state.player_sum - 1, Action.get_value(action)] if trace == Trace.accumulating: E[state.dealer_card - 1, state.player_sum - 1, Action.get_value(action)] += 1 elif trace == Trace.replacing: E[state.dealer_card - 1, state.player_sum - 1, Action.get_value(action)] = 1 elif trace == Trace.dutch: E[state.dealer_card - 1, state.player_sum - 1, Action.get_value(action)] = E[state.dealer_card - 1, state.player_sum - 1, Action.get_value(action)] + step_size*(1 - E[state.dealer_card - 1, state.player_sum - 1, Action.get_value(action)]) if trace == Trace.dutch: self.Q += delta * E else: self.Q += step_size * delta * E E = lambda_ * E action = action_forward state = state_forward if compare_to_monctecarlo: mse_all.append(compute_mse(self.Q, Q_monte_carlo)) if compare_to_monctecarlo: # print (mse_all[-1]) plt.plot(range(0, iters), mse_all, 'r-') plt.xlabel("episodes") plt.ylabel("MSE") # plt.title("lambda = 1") plt.show() #update policy based on action-value function for (dealer_sum, player_sum), value in np.ndenumerate(self.V): self.V[dealer_sum, player_sum] = max(self.Q[dealer_sum, player_sum, :])
if args.load_model: agent.load_model(args.load_model_path) if args.enemy == "hand": agent2 = HandAgent() elif args.enemy == "AC": agent2 = ActorCriticAgent() agent2.load_model(args.load_model_path) env = ICRABattleField() env.seed(args.seed) losses = [] rewards = [] for i_episode in range(1, args.epoch + 1): print("Epoch: [{}/{}]".format(i_episode, args.epoch)) # Initialize the environment and state action = Action() pos = env.reset() if args.enemy == "hand": agent2.reset(pos) state, reward, done, info = env.step(action) for t in (range(2*60*30)): # Other agent if args.enemy == "hand": env.set_robot_action(ID_B1, agent2.select_action(state[ID_B1])) elif args.enemy == "AC": env.set_robot_action(ID_B1, agent2.select_action( state[ID_B1], mode="max_probability")) # Select and perform an action state_map = agent.preprocess(state[ID_R1]) a_m, a_t = agent.run_AC(state_map)
def gen_test_brevitas_xir_integration(): test_ort_integration = Action('Test Brevitas-XIR integration', [], MATRIX, XIR_INTEGRATION_STEP_LIST) test_ort_integration.gen_yaml(VITIS_AI_BASE_YML_TEMPLATE, XIR_INTEGRATION_YML)