Esempi in Python per Action, esempi in Python per utils.Action

Esempio n. 1

0

Mostra file

def gen_test_brevitas_ort_integration():
    test_ort_integration = Action(
        'Test Brevitas-ORT integration',
        EXCLUDE_LIST,
        MATRIX,
        ORT_INTEGRATION_STEP_LIST)
    test_ort_integration.gen_yaml(BASE_YML_TEMPLATE, ORT_INTEGRATION_YML)

Esempio n. 2

0

Mostra file

def gen_examples_pytest_yml():
    pytest = Action(
        'Examples Pytest',
        EXCLUDE_LIST + PYTEST_EXAMPLE_EXCLUDE_LIST_EXTRA,
        combine_od_list([MATRIX, PYTEST_MATRIX_EXTRA]),
        EXAMPLES_PYTEST_STEP_LIST)
    pytest.gen_yaml(BASE_YML_TEMPLATE, EXAMPLES_PYTEST_YML)

Esempio n. 3

0

Mostra file

 def monte_carlo_control(self, iters):
     """ 
     Monte-Carlo control algorithm
     """
     num_wins = 0
     optimal_policy = np.zeros((self.env.dealer_values, self.env.player_values))
     for episode in range(0, iters):
         state_episode = self.env.get_initial_state()
         reward_episode = 0
         history = []
         #sample episode
         while not state_episode.terminal:
             action = self.epsilon_greedy(state_episode)
             
             history.append([state_episode, action, reward_episode])
             #update number of visits
             self.N[state_episode.dealer_card - 1, state_episode.player_sum - 1, Action.get_value(action)] += 1
             
             [reward_episode, state_episode] = self.env.step(state_episode, action)
         
         #update Q
         for state, action, reward in history:
             step_size = 1.0 / self.N[state.dealer_card - 1, state.player_sum - 1, Action.get_value(action)]
             Gt = reward_episode
             error = Gt - self.Q[state.dealer_card - 1, state.player_sum - 1, Action.get_value(action)]
             self.Q[state.dealer_card - 1, state.player_sum - 1, Action.get_value(action)] += step_size * error
         if (Gt == 1):
             num_wins = num_wins + 1
         
     print ("Percentage of win %.3f" % (num_wins / iters * 100.0))
     #update policy based on action-value function
     for (dealer_sum, player_sum), value in np.ndenumerate(self.V):
         if self.Q[dealer_sum, player_sum, 1] > self.Q[dealer_sum, player_sum, 0]:
             optimal_policy[dealer_sum, player_sum] = 1
         self.V[dealer_sum, player_sum] = max(self.Q[dealer_sum, player_sum, :])

Esempio n. 4

0

Mostra file

def gen_pytest_yml():
    pytest = Action(
        'Pytest',
        EXCLUDE_LIST,
        combine_od_list([MATRIX, PYTEST_MATRIX_EXTRA]),
        PYTEST_STEP_LIST)
    pytest.gen_yaml(BASE_YML_TEMPLATE, PYTEST_YML)

Esempio n. 5

0

Mostra file

    def decode_action(self, a_m, a_t, state, mode):
        if mode == "max_probability":
            a_m = np.argmax(a_m)
            a_t = np.argmax(a_t)
        elif mode == "sample":
            #a_m += 0.01
            a_m /= a_m.sum()
            a_m = np.random.choice(range(3), p=a_m)
            #a_t += 0.01
            a_t /= a_t.sum()
            a_t = np.random.choice(range(3), p=a_t)

        action = Action()
        if a_m == 0:  # left
            action.v_n = -1.0
        elif a_m == 1:  # ahead
            action.v_t = +1.0
        elif a_m == 2:  # right
            action.v_n = +1.0

        if a_t == 0:  # left
            action.angular = +1.0
        elif a_t == 1:  # stay
            action.angular = 0.0
        elif a_t == 2:  # right
            action.angular = -1.0

        if state.detect:
            action.shoot = +1.0
        else:
            action.shoot = 0.0

        return action

Esempio n. 6

0

Mostra file

File: settings.py Progetto: Rickwmn/SmartTV

    def __init__(self):
        Gtk.Window.__init__(self, title="SmartTV OpenSource")
        self.set_default_size(get_monitors()[0].width,
                              get_monitors()[0].height)
        left_bar_actions = [
            Action("Back", "go-previous", lambda: Gtk.main_quit()),
            Action("General", "preferences-desktop",
                   lambda: switchStack(0, self.settings_view)),
            Action("Look", "preferences-desktop-theme",
                   lambda: switchStack(1, self.settings_view)),
            Action("Torrent", "torrent",
                   lambda: switchStack(2, self.settings_view)),
            Action("Other", "preferences-other",
                   lambda: switchStack(3, self.settings_view))
        ]

        self.main_divider = Gtk.Box(spacing=6)
        self.add(self.main_divider)
        self.leftbar = LeftBar(actions=left_bar_actions,
                               left_bar_width=LEFT_BAR_WIDTH,
                               default_select=1)
        self.main_divider.pack_start(self.leftbar, False, True, 0)
        self.settings_view = MainStack([
            generalSettings(),
            Gtk.Label(label="Look"),
            Gtk.Label(label="Torrent"),
            Gtk.Label(label="Other")
        ])
        self.main_divider.pack_end(self.settings_view, True, True, 0)

Esempio n. 7

0

Mostra file

def gen_test_brevitas_finn_integration():
    test_finn_integration = Action(
        'Test Brevitas-FINN integration',
        EXCLUDE_LIST + FINN_INTEGRATION_EXCLUDE_LIST_EXTRA,
        MATRIX,
        FINN_INTEGRATION_STEP_LIST)
    test_finn_integration.gen_yaml(BASE_YML_TEMPLATE, FINN_INTEGRATION_YML)

Esempio n. 8

0

Mostra file

def gen_test_brevitas_pyxir_integration():
    test_pyxir_integration = Action(
        'Test Brevitas-PyXIR integration',
        EXCLUDE_LIST + PYXIR_INTEGRATION_EXCLUDE_LIST_EXTRA,
        MATRIX,
        PYXIR_INTEGRATION_STEP_LIST)
    test_pyxir_integration.gen_yaml(BASE_YML_TEMPLATE, PYXIR_INTEGRATION_YML)

Esempio n. 9

0

Mostra file

def gen_test_develop_install_yml():
    test_develop_install = Action(
        'Test develop install',
        EXCLUDE_LIST,
        MATRIX,
        TEST_INSTALL_DEV_STEP_LIST)
    test_develop_install.gen_yaml(BASE_YML_TEMPLATE, DEVELOP_INSTALL_YML)

Esempio n. 10

0

Mostra file

File: keyboard_bot.py Progetto: bwalkowi/quixo

 def move(self, board) -> Tuple[int, int, Action]:
     try:
         row, col, action = input(PROMPT).split()
         return int(row), int(col), Action(int(action))
     except Exception as ex:
         print(colored('[ERROR]', 'red'), ex)
         return self.move(board)

Esempio n. 11

0

Mostra file

File: SRclient.py Progetto: Weijiang-Xiong/Spot_Fetch_a_Drink

def parse_tree(dep_tree, robot: RobotSubject):
    ''' parse a single dependence tree, may result in several commands
        The parsing consists of four steps, which separately determine:
            1. actions of the robot
            2. relations of the actions
            3. items involved in each action
            4. requirements of the items
    '''
    print("{} will do the following things:".format(robot.name))
    ## find the actions to be carried out by this robot
    ## example (('send', 'VB'), 'nsubj', ('Alice', 'NNP')), word "send" has a nominal subject "Alice"
    for ((governor, gov_pos), relation, (dependent, _)) in dep_tree:
        if relation == "nsubj":
            if dependent == robot.name and gov_pos == "VB":
                robot.action_list.append(Action(governor))

    # find the actions are combined by "and" or "or", "and" by default
    robot_actions = [action.name for action in robot.action_list]
    for ((governor, gov_pos), relation, (dependent, _)) in dep_tree:
        if relation == "conj:or":
            if governor in robot_actions and dependent in robot_actions:
                robot.do_all = False

    ## find everything related to the verbs
    cmd_list = []
    for action in robot.action_list:
        action.parse_dep(dep_tree)
        cmd_list.extend(action.gen_command(print_result=True))

    return cmd_list

Esempio n. 12

0

Mostra file

    def register_step(self, observation: Observation, action: Action,
                      reward: Reward, next_observation: Observation,
                      done: bool, action_metadata: dict) -> 'Step':

        self.trajectory_returns += reward
        if 'log_pi' in action_metadata.keys():
            self.trajectory_entropy += action_metadata['log_pi'].reshape(1, -1)

        action_metadata.update(
            dict(returns=self.trajectory_returns,
                 entropy=self.trajectory_entropy))

        step = Step(state=observation.reshape(1, -1),
                    action=action.reshape(1, -1),
                    reward=np.array(reward, dtype=np.float32).reshape(1, -1),
                    next_state=next_observation.reshape(1, -1),
                    termination_masks=np.array(done,
                                               dtype='uint8').reshape(1, -1),
                    metadata=action_metadata)
        if self.step_count == 0:
            self.initialize_records(step)
        else:
            for stat, value in step.asdict().items():
                self.trajectory_buffer[stat].append(value)
            self.step_history.append(step)

        self.step_count += 1
        return step

Esempio n. 13

0

Mostra file

File: routes.py Progetto: sesevgen/DPR_Sim

def distributions():
    conditions.clear()
    dice1 = 8
    dice2 = 6
    reroll_equal_to = []
    reroll_lowest = 0
    roll_min = 0
    drop_lowest = 0

    details = [
        dice1, dice2, reroll_equal_to, reroll_lowest, roll_min, drop_lowest
    ]

    d20 = d20Set(1)
    diceset = Diceset([(dice1, dice2)])
    action = Action(d20, 0, diceset, crit_numbers=[], fail_dmg_scale=0.5)
    stats = Statistics(action)
    stats.collect_statistics()
    dummystats.clear()
    dummystats.append(stats.report_statistics())

    return render_template('distributions.html',
                           imagepath='static/img/placeholder.png',
                           stats=dummystats,
                           conditions=conditions,
                           collecting=False)

Esempio n. 14

0

Mostra file

    def epsilon_greedy(self, state):   
        """ 
        epsilon greedy exploration
        """
        if state.terminal:
            min_num_action = 0
        else:
            min_num_action = min(self.N[state.dealer_card - 1, state.player_sum - 1, :]) 
                
        eps = self.N0 / (self.N0 + min_num_action)

#        print (eps)
        if random() < eps:
            return Action.getRandomAction()
        else:
            action_value = np.argmax(self.Q[state.dealer_card - 1, state.player_sum - 1,:])
            return Action.get_action(action_value)

Esempio n. 15

0

Mostra file

File: gui.py Progetto: Rickwmn/SmartTV

    def __init__(self):
        self.main_view = MainStack([
            Trending(),
            Gtk.Label(label="Apps"),
            Movies(),
            Gtk.Label(label="Songs"),
            Gtk.Label(label="Files"),
        ])
        left_bar_actions = [
            Action("Trending", "go-home",
                   lambda: switchStack(0, self.main_view)),
            Action("Apps", "view-grid",
                   lambda: switchStack(1, self.main_view)),
            Action("Movies", "media-tape",
                   lambda: switchStack(2, self.main_view)),
            Action("Songs", "media-optical-cd-audio",
                   lambda: switchStack(3, self.main_view)),
            Action("Files", "folder", lambda: switchStack(4, self.main_view)),
            # Action("Settings", "open-menu",
            #        lambda: system("python3 " + path.abspath("settings.py") + " " + argv[1])),
        ]
        Gtk.Window.__init__(self, title="SmartTV OpenSource")
        self.set_default_size(get_monitors()[0].width,
                              get_monitors()[0].height)
        self.main_divider = Gtk.Box(spacing=6)
        self.add(self.main_divider)
        self.leftbar = LeftBar(actions=left_bar_actions,
                               left_bar_width=LEFT_BAR_WIDTH)
        self.sidebar_divider = Gtk.Box(orientation=Gtk.Orientation.VERTICAL,
                                       spacing=6)

        self.sidebar_divider.pack_start(WeatherBox(), False, False, 0)
        self.sidebar_divider.pack_start(self.leftbar, True, True, 0)
        settings_list = Gtk.ListBox()
        settings_list.set_selection_mode(Gtk.SelectionMode.NONE)
        settings_list.insert(ListTile("Settings", "open-menu"), 0)
        settings_list.connect(
            "row-activated", lambda x, y: system("python3 " + path.abspath(
                "settings.py") + " " + argv[1]))

        self.sidebar_divider.pack_end(settings_list, False, False, 0)

        self.main_divider.pack_start(self.sidebar_divider, False, False, 0)

        self.main_divider.pack_end(self.main_view, True, True, 0)

Esempio n. 16

0

Mostra file

File: Game.py Progetto: thyarles/simple-muzero

 def store_search_statistics(self, root: Node):
     sum_visits = sum(child.visit_count for child in root.children.values())
     action_space = (Action(index)
                     for index in range(self.action_space_size))
     self.child_visits.append([
         root.children[a].visit_count /
         sum_visits if a in root.children else 0 for a in action_space
     ])
     self.root_values.append(root.value())

Esempio n. 17

0

Mostra file

    def linear_sarsa(self, iters, lambda_, compare_to_monctecarlo = False):     
        """ 
        Linear Function Approximation of sarsa lambda algorithm
        """
        if compare_to_monctecarlo:
            monte_carlo_iterations = 1000000
            env = Environment()
            agent = Agent(env)
            agent.monte_carlo_control(monte_carlo_iterations)
            Q_monte_carlo = agent.Q
            mse_all = []
            
        for episode in range(0, iters):
            E = np.zeros(self.number_of_features) 
            #initialize state and action          
            state = self.env.get_initial_state()
            reward = 0
            action = self.epsilon_greedy_linear_constant(state)
#            self.N[state.dealer_card - 1, state.player_sum - 1, Action.get_value(action)] += 1 
            while not state.terminal:                   
#                update number of visits
                self.N[state.dealer_card - 1, state.player_sum - 1, Action.get_value(action)] += 1              
                [reward, state_forward] = self.env.step(state, action)                 
                action_forward = self.epsilon_greedy_linear_constant(state_forward)  
                
                if not state_forward.terminal:
                    current_estimate = reward + self.estimate_Q(state_forward, action_forward)
                else:
                    current_estimate = reward
                    
                previous_estimate = self.estimate_Q(state, action)
                delta = current_estimate - previous_estimate

                E = np.add(E, self.get_feature_vector(state, action))
                step_size = 0.01                
                self.weights += step_size * delta * E
                E = lambda_ * E

                action = action_forward
                state = state_forward
            if compare_to_monctecarlo:
                mse_all.append(compute_mse(self.approximation_to_Q(), Q_monte_carlo))
  
        if compare_to_monctecarlo:
#            print (mse_all[-1])
            plt.plot(range(0, iters), mse_all, 'r-')
            plt.xlabel("episodes")
            plt.ylabel("MSE")
#            plt.title("lambda = 0")
            plt.show()
            
        for (dealer_sum, player_sum), value in np.ndenumerate(self.V):
            s = State(dealer_sum+1, player_sum+1)
            self.Q[dealer_sum, player_sum ,0] = np.dot(self.get_feature_vector(s, Action.hit), self.weights)
            self.Q[dealer_sum, player_sum ,1] = np.dot(self.get_feature_vector(s, Action.stick), self.weights)
            self.V[dealer_sum, player_sum] = max(self.estimate_Q(s,Action.hit), self.estimate_Q(s,Action.stick))

Esempio n. 18

0

Mostra file

File: routes.py Progetto: sesevgen/DPR_Sim

def home():
    dice1 = 8
    dice2 = 6
    d20 = d20Set(1)
    diceset = Diceset([(dice1, dice2)])
    action = Action(d20, 0, diceset, crit_numbers=[], fail_dmg_scale=0.5)
    stats = Statistics(action)
    stats.collect_statistics()

    return render_template(
        'home.html'
    )  #, imagepath='static/img/placeholder.png', stats=stats.report_statistics())

Esempio n. 19

0

Mostra file

 def epsilon_greedy_linear_constant(self, state, eps_ = 0.1):        
     """ 
     epsilon greedy exploration with constant exploration epsilon
     """
     eps = eps_
     if random() < eps or state.terminal:
         return Action.getRandomAction()
     else:
         actionHit_value = sum(self.get_feature_vector(state, Action.hit) * self.weights)
         actionStick_value = sum(self.get_feature_vector(state, Action.stick) * self.weights)
         action = Action.hit if actionHit_value > actionStick_value else Action.stick
         return action

Esempio n. 20

0

Mostra file

    def addAction(self, jsonStr):
        tempAction = json.loads(jsonStr)  #Storing JSON in a python dictionary

        try:
            action = Action(tempAction['action'], tempAction['time'])
        except Exception as ex:
            logging.exception('Caught an error')
            print("Couldn't create Action object")
            print(
                "Added JSON object must have two ONLY keys, 'action' and 'time'"
            )
            return

        # Check if action exists in actions dictionary, if it does update avgTime and numActions, else add to the dictionary
        if (self.__doesExist(tempAction) == True):
            print("updating old action")
            self.actions[action.getAction()].updateAction(
                action.getTime()
            )  # Updating the current object in the dictionary to have proper numActions and avgTime
        else:
            print("inserting new action")
            self.actions.update({action.getAction(): action})

Esempio n. 21

0

Mostra file

 def next(self, action: Action = Action(), **kwargs):
     """
     Required method for getting next state, possibly given an action.
     Should only update the attributes of the class.
     """
     # YOUR CODE HERE
     self.terminal = kwargs.get('terminal', False)
     prop = action.properties
     if prop == 'nudge':
         self.properties[1] += 1
     else:
         self.properties[0] += 1
     return self

Esempio n. 22

0

Mostra file

File: routes.py Progetto: sesevgen/DPR_Sim

def calculate():
    root = os.path.dirname(__file__)
    dir = os.path.join(root, 'static/img/temp/')
    files = os.listdir(dir)
    for file in files:
        os.remove(os.path.join(dir, file))

    alpha = 0.9**len(conditions)
    statistics = []
    i = 0
    for details in conditions:

        d20 = d20Set(1)
        diceset = Diceset([(details[0], details[1])], details[2], details[3],
                          details[4], details[5])
        action = Action(d20, 0, diceset, crit_numbers=[], fail_dmg_scale=0.0)
        stats = Statistics(action)
        stats.collect_statistics()
        stats.plot_histogram(alpha, str(i))
        i += 1
        statistics.append(stats.report_statistics())

    plotname = 'static/img/temp/'
    for detail in details:
        plotname = plotname + str(detail)
    plotname = plotname + '.png'

    copyconditions = conditions.copy()
    conditions.clear()

    plt.savefig(os.path.join(root, plotname))
    plt.clf()

    return render_template('distributions.html',
                           imagepath=plotname,
                           stats=statistics,
                           conditions=copyconditions,
                           collecting=False)

Esempio n. 23

0

Mostra file

    def select_action(self, state: RobotState):
        action = Action()
        pos = state.pos
        vel = state.velocity
        angle = state.angle
        if state.detect:
            action.shoot = +1.0
        else:
            action.shoot = 0.0

        if ((pos[0]-self.target[0])**2 + (pos[1]-self.target[1])**2 < 0.1):
            self.index = random.choice(self.connected[self.index])
            self.target = self.avaiable_pos[self.index]
            #print(self.target)
            #self.index = (self.index + 1) % len(self.path)
            #self.target = self.path[self.index]

        v, omega = self.move.moveTo(pos, vel, angle, self.target)
        action.v_t = v[0]
        action.v_n = v[1]
        action.omega = omega
        return action

Esempio n. 24

0

Mostra file

def get_action_from_args(request_args: dict) -> Action:
    """Define how to map request arguments to an Action."""
    # YOUR CODE HERE
    return Action()

Esempio n. 25

0

Mostra file

File: actions.py Progetto: archeocs/qazp-framework

 def __init__(self, label, context, etype, query, parent=None):
     Action.__init__(self, label, context)
     self.etype = etype
     self.query = query

Esempio n. 26

0

Mostra file

File: actions.py Progetto: archeocs/qazp-framework

 def __init__(self, context,parent=None):
     Action.__init__(self, u'Lista', context)

Esempio n. 27

0

Mostra file

File: test_env.py Progetto: zhw-git/Collision-Avoidance-with-DRL

def test_step_with_kinematic():
    env_config = configparser.RawConfigParser()
    env_config.read('configs/test_env.config')
    env_config.set('agent', 'kinematic', 'true')
    test_env = ENV(env_config, phase='test')
    test_env.reset()

    # test state computation
    states, rewards, done_signals = test_env.step((Action(1, 0), Action(1, 0)))
    assert np.allclose(
        states[0], JointState(-1, 0, 1, 0, 0.3, 2, 0, 1.0, 0, 1, 0, -1, 0,
                              0.3))
    assert np.allclose(
        states[1],
        JointState(1, 0, -1, 0, 0.3, -2, 0, 1.0, np.pi, -1, 0, 1, 0, 0.3))
    assert rewards == [0, 0]
    assert done_signals == [False, False]

    # test one-step lookahead
    reward, end_time = test_env.compute_reward(0, [Action(1.5, 0), None])
    assert reward == -0.25
    assert end_time == 1

    reward, end_time = test_env.compute_reward(
        0, [Action(1.5, 0), Action(1.5, 0)])
    assert reward == -0.25
    assert end_time == 0.5

    # test collision detection
    states, rewards, done_signals = test_env.step((Action(1, 0), Action(1, 0)))
    assert np.allclose(
        states[0], JointState(0, 0, 1, 0, 0.3, 2, 0, 1.0, 0, 0, 0, -1, 0, 0.3))
    assert np.allclose(
        states[1],
        JointState(0, 0, -1, 0, 0.3, -2, 0, 1.0, np.pi, 0, 0, 1, 0, 0.3))
    assert rewards == [-0.25, -0.25]
    assert done_signals == [2, 2]

    # test reaching goal
    test_env = ENV(env_config, phase='test')
    test_env.reset()
    test_env.step((Action(1, np.pi / 2), Action(2, np.pi / 2)))
    test_env.step((Action(4, -np.pi / 2), Action(4, -np.pi / 2)))
    states, rewards, done_signals = test_env.step(
        (Action(1, -np.pi / 2), Action(2, -np.pi / 2)))
    assert rewards == [1, 1]
    assert done_signals == [1, 1]

Esempio n. 28

0

Mostra file

    def td_learning(self, iters, lambda_, compare_to_monctecarlo = False, trace = Trace.accumulating):
        """ 
        sarsa lambda algorithm
        """
        if compare_to_monctecarlo:
            monte_carlo_iterations = 1000000
            env = Environment()
            agent = Agent(env)
            agent.monte_carlo_control(monte_carlo_iterations)
            Q_monte_carlo = agent.Q
            mse_all = []
            
        for episode in range(0, iters):
            E = np.zeros(((self.env.dealer_values, self.env.player_values, self.env.action_values)))  
            
            #initialize state and action          
            state = self.env.get_initial_state()
            reward = 0
            action = self.epsilon_greedy(state)
            while not state.terminal:                   
#                update number of visits
                self.N[state.dealer_card - 1, state.player_sum - 1, Action.get_value(action)] += 1              
                [reward, state_forward] = self.env.step(state, action)                 
                action_forward = self.epsilon_greedy(state_forward)  
                
                if not state_forward.terminal:
                    current_estimate = reward + self.Q[state_forward.dealer_card - 1, state_forward.player_sum - 1, Action.get_value(action_forward)]
                else:
                    current_estimate = reward
                    
                previous_estimate = self.Q[state.dealer_card - 1, state.player_sum - 1, Action.get_value(action)]
                delta = current_estimate - previous_estimate
                
                step_size = 1.0 / self.N[state.dealer_card - 1, state.player_sum - 1, Action.get_value(action)]
                if trace == Trace.accumulating:
                    E[state.dealer_card - 1, state.player_sum - 1, Action.get_value(action)] += 1
                elif trace == Trace.replacing:
                    E[state.dealer_card - 1, state.player_sum - 1, Action.get_value(action)] = 1
                elif trace == Trace.dutch:
                    E[state.dealer_card - 1, state.player_sum - 1, Action.get_value(action)] = E[state.dealer_card - 1, state.player_sum - 1, Action.get_value(action)] + step_size*(1 - E[state.dealer_card - 1, state.player_sum - 1, Action.get_value(action)])

                if trace == Trace.dutch:
                    self.Q += delta * E
                else:
                    self.Q += step_size * delta * E
                E = lambda_ * E
              
                action = action_forward
                state = state_forward
            
            if compare_to_monctecarlo:
                mse_all.append(compute_mse(self.Q, Q_monte_carlo))
  
        if compare_to_monctecarlo:
#            print (mse_all[-1])
            plt.plot(range(0, iters), mse_all, 'r-')
            plt.xlabel("episodes")
            plt.ylabel("MSE")
#            plt.title("lambda = 1")
            plt.show()
                                 
        #update policy based on action-value function
        for (dealer_sum, player_sum), value in np.ndenumerate(self.V):
            self.V[dealer_sum, player_sum] = max(self.Q[dealer_sum, player_sum, :])

Esempio n. 29

0

Mostra file

File: train.py Progetto: zhanghang0417/supreme-invention

if args.load_model:
    agent.load_model(args.load_model_path)
if args.enemy == "hand":
    agent2 = HandAgent()
elif args.enemy == "AC":
    agent2 = ActorCriticAgent()
    agent2.load_model(args.load_model_path)

env = ICRABattleField()
env.seed(args.seed)
losses = []
rewards = []
for i_episode in range(1, args.epoch + 1):
    print("Epoch: [{}/{}]".format(i_episode, args.epoch))
    # Initialize the environment and state
    action = Action()
    pos = env.reset()
    if args.enemy == "hand":
        agent2.reset(pos)
    state, reward, done, info = env.step(action)
    for t in (range(2*60*30)):
        # Other agent
        if args.enemy == "hand":
            env.set_robot_action(ID_B1, agent2.select_action(state[ID_B1]))
        elif args.enemy == "AC":
            env.set_robot_action(ID_B1, agent2.select_action(
                state[ID_B1], mode="max_probability"))

        # Select and perform an action
        state_map = agent.preprocess(state[ID_R1])
        a_m, a_t = agent.run_AC(state_map)

Esempio n. 30

0

Mostra file

File: gen_vitis_ai_actions.py Progetto: fpjentzsch/brevitas

def gen_test_brevitas_xir_integration():
    test_ort_integration = Action('Test Brevitas-XIR integration', [], MATRIX,
                                  XIR_INTEGRATION_STEP_LIST)
    test_ort_integration.gen_yaml(VITIS_AI_BASE_YML_TEMPLATE,
                                  XIR_INTEGRATION_YML)