Python Environmentの例、utils.Environment Pythonの例

コード例 #1

0

ファイルを表示

    def __init__(self, parent_logger):
        """

        :type parent_logger: logging.Logger
        """
        self.file_search_paths = []
        self.services = []
        self.__artifacts = []
        self.reporters = []
        self.artifacts_dir = None
        self.log = parent_logger.getChild(self.__class__.__name__)
        self.env = Environment(self.log, dict(os.environ))
        self.shared_env = Environment(self.log)
        self.config = Configuration()
        self.config.log = self.log.getChild(Configuration.__name__)
        self.modules = {}  # available modules
        self.provisioning = Provisioning()
        self.aggregator = Aggregator(is_functional=False)
        self.interrupted = False
        self.check_interval = 1
        self.stopping_reason = None
        self.engine_loop_utilization = 0
        self.prepared = []
        self.started = []

        self.default_cwd = None
        self.logging_level_down = lambda: None
        self.logging_level_up = lambda: None

        self._http_client = None

コード例 #2

0

ファイルを表示

ファイル: AStar.py プロジェクト: AmanVirmani/Astar_Turtlebot3

    def Astar(self):
        search = []
        # Set current node to start and add start node to the node list and node search dictionary
        CurrentNode = Node(self.start, self.start, self.goal, self.stepSize)
        NodeList = [CurrentNode]
        NodeDict = {tuple(CurrentNode.env)}
        search.append(CurrentNode)
        # Check if the current node is the goal node
        while sqrt((CurrentNode.env[0] - self.goal[0]) ** 2 + (CurrentNode.env[1] - self.goal[1]) ** 2) > 1.5:

            # Keep checking if there are nodes in list
            if len(NodeList) > 0:
                # Set current node to the first node in the list and then delete from list
                CurrentNode = NodeList.pop()

                Course = Environment(CurrentNode.env, self.clearance)
                # Check all of the possible actions
                for action in Course.possibleMoves(self.start, CurrentNode, self.stepSize):

                    # Search dictonary and add node to list and dictionary if it hasn't been explored yet
                    if tuple((int(action.env[0]), int(action.env[1]), action.env[2])) not in NodeDict:
                        NodeList.append(action)
                        search.append(action)
                        NodeDict.add(tuple((int(action.env[0]), int(action.env[1]), action.env[2])))
                # Sort list of nodes based on cost
                NodeList.sort(key=lambda x: x.weight, reverse=True)

            else:
                return -1, CurrentNode.path(), search
        # solve for path
        x = CurrentNode.path()
        path = []
        for node in x:
            path.append(node)
        return path, search

コード例 #3

0

ファイルを表示

ファイル: Game.py プロジェクト: thyarles/simple-muzero

 def __init__(self, action_space_size: int, discount: float):
     self.environment = Environment()  # Game specific environment.
     self.history = []
     self.rewards = []
     self.child_visits = []
     self.root_values = []
     self.action_space_size = action_space_size
     self.discount = discount

コード例 #4

0

ファイルを表示

    def start_subprocess(self, args, cwd, stdout, stderr, stdin, shell, env):
        if cwd is None:
            cwd = self.default_cwd

        env = Environment(self.log, env.get())
        env.set(self.shared_env.get())

        return shell_exec(args, cwd=cwd, stdout=stdout, stderr=stderr, stdin=stdin, shell=shell, env=env.get())

コード例 #5

0

ファイルを表示

ファイル: AStar.py プロジェクト: AmanVirmani/Astar_differential_drive

    def Astar(self):
        search = []
        # Set current node to start and add start node to the node list and node search dictionary
        CurrentNode = Node(self.start, self.start, self.goal, self.stepSize)
        NodeList = [CurrentNode]
        NodeDict = {tuple(CurrentNode.env)}
        search.append(CurrentNode)
        # Check if the current node is the goal node
        while sqrt((CurrentNode.env[0] - self.goal[0])**2 +
                   (CurrentNode.env[1] - self.goal[1])**2) > 1.5:

            # Keep checking if there are nodes in list
            if len(NodeList) > 0:
                # Set current node to the first node in the list and then delete from list
                CurrentNode = NodeList.pop()

                Course = Environment(CurrentNode.env, self.clearance)
                # Check all of the possible nodes
                for node in Course.possibleMoves(self.start, CurrentNode,
                                                 self.stepSize):

                    # Search dictonary and add node to list and dictionary if it hasn't been explored yet
                    if tuple((int(node.env[0]), int(node.env[1]),
                              node.env[2])) not in NodeDict:
                        NodeList.append(node)
                        search.append(node)
                        NodeDict.add(
                            tuple((int(node.env[0]), int(node.env[1]),
                                   node.env[2])))
                        sub_nodes = node.sub_nodes
                        for i in range(len(sub_nodes) - 1):
                            cv2.line(self.map,
                                     (int(sub_nodes[i][0] * 10),
                                      1020 - int(sub_nodes[i][1] * 10)),
                                     (int(sub_nodes[i + 1][0] * 10),
                                      1020 - int(sub_nodes[i + 1][1] * 10)),
                                     (0, 255, 0))
                        self.video_output.write(self.map)
                # Sort list of nodes based on cost
                NodeList.sort(key=lambda x: x.cost, reverse=True)

            else:
                return -1, CurrentNode.path(), search
        # solve for path
        x = CurrentNode.path()
        path = []
        for node in x:
            path.append(node)
        return 0, path, search

コード例 #6

0

ファイルを表示

def build_env(filename, vector_dim, window_size=5, batch_size=20, lr=.1):
    """
        this function builds a training environment, as definied in the file `utils.py`
        components for handling data are defined in the file `data.py`
        the model itself is defined in the `model.py` file.
    """
    # compute vocabulary
    print("compute vocabulary")
    itos, stoi = compute_vocabulary(filename, pad_token=PAD_TOK)
    # compute data iterator
    print("compute data iterator")
    data_iter = iter_examples(filename,
                              stoi,
                              window_size=window_size,
                              batch_size=batch_size)
    # build CBOW model

    print("build CBOW")
    model = Word2VecCBOWModel(vocab_size=len(itos),
                              vector_dim=vector_dim,
                              pad_idx=stoi[PAD_TOK])
    # initialize optimization algorithm
    print("compute optimizer")
    optim = torch.optim.SGD(model.parameters(), lr=lr)
    # define loss function
    print("compute criterion")
    criterion = torch.nn.NLLLoss()
    # build full environment
    return Environment(itos, stoi, data_iter, model, optim, criterion)

コード例 #7

0

ファイルを表示

def test_env_update(shell_type, tmpdir):
    # check updating a package when a newer version
    with Environment(shell_type) as env:
        # first install an older version
        version = "1.25.11"
        config_a = tmpdir / "a.yml"
        config_a.write(
            f"""
            dependencies:
             - python
             - urllib3={version}
            """
        )
        env.mamba(f"env update -q -f {config_a}")
        out = env.execute('python -c "import urllib3; print(urllib3.__version__)"')

        # check that the installed version is the old one
        assert out[-1] == version

        # then release the pin
        config_b = tmpdir / "b.yml"
        config_b.write(
            """
            dependencies:
             - urllib3
            """
        )
        env.mamba(f"env update -q -f {config_b}")
        out = env.execute('python -c "import urllib3; print(urllib3.__version__)"')
        # check that the installed version is newer
        assert StrictVersion(out[-1]) > StrictVersion(version)

コード例 #8

0

ファイルを表示

def test_track_features(shell_type):
    with Environment(shell_type) as env:
        # should install CPython since PyPy has track features
        version = "3.7.9"
        env.mamba(
            f'install -q -y "python={version}" --strict-channel-priority -c conda-forge'
        )
        out = env.execute('python -c "import sys; print(sys.version)"')

        if platform.system() == "Windows":
            assert out[-1].startswith(version)
            assert "[MSC v." in out[-1]
        elif platform.system() == "Linux":
            assert out[-2].startswith(version)
            assert out[-1].startswith("[GCC")
        else:
            assert out[-2].startswith(version)
            assert out[-1].startswith("[Clang")

        if platform.system() == "Linux":
            # now force PyPy install
            env.mamba(
                f'install -q -y "python={version}=*pypy" --strict-channel-priority -c conda-forge'
            )
            out = env.execute('python -c "import sys; print(sys.version)"')
            assert out[-2].startswith(version)
            assert out[-1].startswith("[PyPy")

コード例 #9

0

ファイルを表示

    def __init__(self, args):

        # which environment to load from the opencv database
        self.env_id = "PongNoFrameskip-v4"
        # create the environment
        self.env = Environment(self.env_id)

        # part of the q-value formula
        self.discount_factor = 0.99
        self.batch_size = 64
        # how often to update the network (backpropogation)
        self.update_frequency = 4
        # often synchronize with the target  network
        self.target_network_update_freq = 1000

        # keeps track of the frames for training, and retrieves them in batches 
        self.agent_history_length = 4
        self.memory = ReplayMemory(capacity=10000, batch_size=self.batch_size)

        # two neural networks. One for main and one for target
        self.main_network = PongNetwork(num_actions=self.env.get_action_space_size(), agent_history_length=self.agent_history_length)
        self.target_network = PongNetwork(num_actions=self.env.get_action_space_size(), agent_history_length=self.agent_history_length)
        
        # adam optimizer. just a standard procedure
        self.optimizer = Adam(learning_rate=1e-4, epsilon=1e-6)
        # we start with a high exploration rate then slowly decrease it
        self.init_explr = 1.0
        self.final_explr = 0.1
        self.final_explr_frame = 1000000
        self.replay_start_size = 10000

        # metrics for the loss 
        self.loss = tf.keras.losses.Huber()
        # this will be the mean of 100 last rewards
        self.loss_metric = tf.keras.metrics.Mean(name="loss")
        # comes from the q loss below
        self.q_metric = tf.keras.metrics.Mean(name="Q_value")

        # what is the max number of frames to train. probably won't reach here.
        self.training_frames = int(1e7)

        # path to save the checkpoints, logs and the weights
        self.checkpoint_path = "./checkpoints/" + args.run_name
        self.tensorboard_writer = tf.summary.create_file_writer(self.checkpoint_path + "/runs/")
        self.print_log_interval = 10
        self.save_weight_interval = 10
        self.env.reset()

コード例 #10

0

ファイルを表示

ファイル: test_all.py プロジェクト: xhochy/mamba

def test_track_features():
    with Environment() as env:
        # should install CPython since PyPy has track features
        version = '3.6.9'
        env.execute(f'$MAMBA install -q -y python={version}')
        out = env.execute('python -c "import sys; print(sys.version)"')
        assert out[-2].startswith(version)
        assert out[-1].startswith('[GCC')

        # now force PyPy install
        env.execute(f'$MAMBA install -q -y python={version}=*pypy')
        out = env.execute('python -c "import sys; print(sys.version)"')
        assert out[-2].startswith(version)
        assert out[-1].startswith('[PyPy')

コード例 #11

0

ファイルを表示

def main(kwargs_list):
    func = run_job_for_env
    schedule = []
    for kwargs in kwargs_list:
        job = (func, [], kwargs)
        schedule.append(job)
    ENV = Environment(schedule)
    ENV.run()
    ENV.save_states('states.dmp')

コード例 #12

0

ファイルを表示

ファイル: test_all.py プロジェクト: rowhit/mamba-1

def test_update():
    # check updating a package when a newer version
    with Environment() as env:
        # first install an older version
        version = "1.25.7"
        env.execute(f"$MAMBA install -q -y urllib3={version}")
        out = env.execute('python -c "import urllib3; print(urllib3.__version__)"')
        # check that the installed version is the old one
        assert out[-1] == version

        # then update package
        env.execute("$MAMBA update -q -y urllib3")
        out = env.execute('python -c "import urllib3; print(urllib3.__version__)"')
        # check that the installed version is newer
        assert StrictVersion(out[-1]) > StrictVersion(version)

コード例 #13

0

ファイルを表示

ファイル: test_all.py プロジェクト: rowhit/mamba-1

def test_track_features():
    with Environment() as env:
        # should install CPython since PyPy has track features
        version = "3.6.9"
        env.execute(
            f'$MAMBA install -q -y "python={version}" --strict-channel-priority'
        )
        out = env.execute('python -c "import sys; print(sys.version)"')
        assert out[-2].startswith(version)
        assert out[-1].startswith("[GCC")

        # now force PyPy install
        env.execute(
            f'$MAMBA install -q -y "python={version}=*pypy" --strict-channel-priority'
        )
        out = env.execute('python -c "import sys; print(sys.version)"')
        assert out[-2].startswith(version)
        assert out[-1].startswith("[PyPy")

コード例 #14

0

ファイルを表示

ファイル: test_all.py プロジェクト: syslaila/mamba

def test_update_py():
    # check updating a package when a newer version
    if platform.system() == "Windows":
        shell_type = "cmd.exe"
    else:
        shell_type = "bash"

    with Environment(shell_type) as env:
        env.mamba(f'install -q -y "python=3.8" pip -c conda-forge')
        out = env.execute('python -c "import sys; print(sys.version)"')
        assert "3.8" in out[0]

        out = env.execute('python -c "import pip; print(pip.__version__)"')
        assert len(out)

        env.mamba(f'install -q -y "python=3.9" -c conda-forge')
        out = env.execute('python -c "import sys; print(sys.version)"')
        assert "3.9" in out[0]
        out = env.execute('python -c "import pip; print(pip.__version__)"')
        assert len(out)

コード例 #15

0

ファイルを表示

class Execute:
    def __init__(self, path):
        self.config = Configuration.construct(path)
        self.env = Environment(self.config)
        self.memory = ReplayMemory(self.config)
        self.model = Model(self.config)
        self.ep = None

    def get_epsilon(self, is_play):
        if is_play:
            return self.config.play.ep
        ep_start = self.config.train.ep.start
        ep_final = self.config.train.ep.final
        ep_num_frames = self.config.train.ep.num_frames
        decay = (ep_start - ep_final) / ep_num_frames
        if self.ep is None:
            self.ep = ep_start
        self.ep = max(self.ep - decay, ep_final)
        return self.ep

    def log(self, **kawrgs):
        log = ""
        for name, value in kawrgs.items():
            log += f"{name}: {value}, "
        print(log)

    def run_episode(self, episode=1, steps=0, is_play=True, debug=False):
        config = self.config

        self.env.reset()
        action = 1
        _, _, curr_state, is_done = self.env.step(action)
        total_reward = 0
        update_net = 0; C = config.train.network_update_freq
        t = 0; T = config.max_episode_length

        while not is_done and t < T:
            if t % config.action_repeat == 0:
                ep = self.get_epsilon(is_play)
                action = self.model.choose_action(curr_state, ep)
            prev_state, reward, curr_state, is_done = self.env.step(action)
            total_reward += reward
            t += 1

            if is_play:
                self.env.render("human")
                if debug and t % config.play.debug.time == 0:
                    self.log(ftype=self.env.get_frame_type(), action=action, reward=total_reward)
                continue

            self.memory.add((prev_state, action, reward, curr_state, is_done))
            if self.memory.get_size() > config.train.replay_start_size:
                for i in range(config.train.batch_run):
                    batch = self.memory.sample()
                    self.model.optimize(batch)
                    steps = (steps + 1) % C
                if steps % C == 0:
                    self.model.update_qhat()
                    update_net += 1

        if not is_play and debug and episode % config.train.debug.time == 0:
            self.log(ftype=self.env.get_frame_type(), total_reward=total_reward, network_update_steps=update_net, episode_time=t, ep=ep)

        return total_reward, steps

    def load_model(self):
        ftype = self.env.get_frame_type()
        in_size = self.env.get_in_size()
        num_actions = self.env.get_num_actions()
        self.model.load_model(ftype, in_size, num_actions)

    def play(self, debug=False):
        self.load_model()
        for ep in range(1):
            self.run_episode(is_play=True, debug=debug)

    def train(self, debug=False):
        self.load_model()
        optimize_steps = 0
        episodes = self.config.train.episodes
        for episode in range(1, episodes+1):
            reward, steps = self.run_episode(episode=episode, steps=optimize_steps, is_play=False, debug=debug)
            optimize_steps += steps
            if episode % self.config.train.save_model_episode == 0:
                self.model.save_model()
        self.model.update_qhat()
        self.model.save_model()

    def close(self):
        self.env.close()
        self.memory.close()

コード例 #16

0

ファイルを表示

ファイル: Game.py プロジェクト: thyarles/simple-muzero

class Game(object):
    """A single episode of interaction with the environment."""
    def __init__(self, action_space_size: int, discount: float):
        self.environment = Environment()  # Game specific environment.
        self.history = []
        self.rewards = []
        self.child_visits = []
        self.root_values = []
        self.action_space_size = action_space_size
        self.discount = discount

    def terminal(self) -> bool:
        # Game specific termination rules.
        pass

    def legal_actions(self) -> List[Action]:
        # Game specific calculation of legal actions.
        return []

    def apply(self, action: Action):
        reward = self.environment.step(action)
        self.rewards.append(reward)
        self.history.append(action)

    def store_search_statistics(self, root: Node):
        sum_visits = sum(child.visit_count for child in root.children.values())
        action_space = (Action(index)
                        for index in range(self.action_space_size))
        self.child_visits.append([
            root.children[a].visit_count /
            sum_visits if a in root.children else 0 for a in action_space
        ])
        self.root_values.append(root.value())

    def make_image(self, state_index: int):
        # Game specific feature planes.
        return []

    def make_target(self, state_index: int, num_unroll_steps: int,
                    td_steps: int, to_play: Player):
        # The value target is the discounted root value of the search tree N steps
        # into the future, plus the discounted sum of all rewards until then.
        targets = []
        for current_index in range(state_index,
                                   state_index + num_unroll_steps + 1):
            bootstrap_index = current_index + td_steps
            if bootstrap_index < len(self.root_values):
                value = self.root_values[
                    bootstrap_index] * self.discount**td_steps
            else:
                value = 0

            if self.rewards:
                for i, reward in enumerate(
                        self.rewards[current_index:bootstrap_index]):
                    value += reward * self.discount**i  # pytype: disable=unsupported-operands

            if current_index < len(self.root_values):
                if self.rewards:
                    targets.append((value, self.rewards[current_index],
                                    self.child_visits[current_index]))
                else:
                    targets.append(
                        (value, 0, self.child_visits[current_index]))
            else:
                # States past the end of games are treated as absorbing states.
                targets.append((0, 0, []))
        return targets

    def to_play(self) -> Player:
        return Player()

    def action_history(self) -> ActionHistory:
        return ActionHistory(self.history, self.action_space_size)

コード例 #17

0

ファイルを表示

class Agent:
    
    def __init__(self, args):

        # which environment to load from the opencv database
        self.env_id = "PongNoFrameskip-v4"
        # create the environment
        self.env = Environment(self.env_id)

        # part of the q-value formula
        self.discount_factor = 0.99
        self.batch_size = 64
        # how often to update the network (backpropogation)
        self.update_frequency = 4
        # often synchronize with the target  network
        self.target_network_update_freq = 1000

        # keeps track of the frames for training, and retrieves them in batches 
        self.agent_history_length = 4
        self.memory = ReplayMemory(capacity=10000, batch_size=self.batch_size)

        # two neural networks. One for main and one for target
        self.main_network = PongNetwork(num_actions=self.env.get_action_space_size(), agent_history_length=self.agent_history_length)
        self.target_network = PongNetwork(num_actions=self.env.get_action_space_size(), agent_history_length=self.agent_history_length)
        
        # adam optimizer. just a standard procedure
        self.optimizer = Adam(learning_rate=1e-4, epsilon=1e-6)
        # we start with a high exploration rate then slowly decrease it
        self.init_explr = 1.0
        self.final_explr = 0.1
        self.final_explr_frame = 1000000
        self.replay_start_size = 10000

        # metrics for the loss 
        self.loss = tf.keras.losses.Huber()
        # this will be the mean of 100 last rewards
        self.loss_metric = tf.keras.metrics.Mean(name="loss")
        # comes from the q loss below
        self.q_metric = tf.keras.metrics.Mean(name="Q_value")

        # what is the max number of frames to train. probably won't reach here.
        self.training_frames = int(1e7)

        # path to save the checkpoints, logs and the weights
        self.checkpoint_path = "./checkpoints/" + args.run_name
        self.tensorboard_writer = tf.summary.create_file_writer(self.checkpoint_path + "/runs/")
        self.print_log_interval = 10
        self.save_weight_interval = 10
        self.env.reset()
           

     # calculate the network loss on the replay buffer (Q-learning)
    def update_main_q_network(self, state_batch, action_batch, reward_batch, next_state_batch, terminal_batch):
       
        with tf.GradientTape() as tape:
            ## THIS IS WHERE THE MAGIC HAPPENS!
            ## L = Q(s, a) - (r + discount_factor* Max Q(s’, a))
            next_state_q = self.target_network(next_state_batch)
            next_state_max_q = tf.math.reduce_max(next_state_q, axis=1)
            expected_q = reward_batch + self.discount_factor * next_state_max_q * (1.0 - tf.cast(terminal_batch, tf.float32))
            main_q = tf.reduce_sum(self.main_network(state_batch) * tf.one_hot(action_batch, self.env.get_action_space_size(), 1.0, 0.0), axis=1)
            loss = self.loss(tf.stop_gradient(expected_q), main_q)

        gradients = tape.gradient(loss, self.main_network.trainable_variables)
        clipped_gradients = [tf.clip_by_norm(grad, 10) for grad in gradients]
        self.optimizer.apply_gradients(zip(clipped_gradients, self.main_network.trainable_variables))

        self.loss_metric.update_state(loss)
        self.q_metric.update_state(main_q)

        return loss

    
     # calculate the network loss on the replay buffer (Double Q-learning)
    def update_main_dq_network(self, state_batch, action_batch, reward_batch, next_state_batch, terminal_batch):
        
        with tf.GradientTape() as tape:
            # THIS IS WHERE THE MAGIC HAPPENS!
            ## here we maintain two Q values: one to maximize the reward in the next state and one to update current state
            q_online = self.main_network(next_state_batch)  # Use q values from online network
            action_q_online = tf.math.argmax(q_online, axis=1)  # optimal actions from the q_online
            q_target = self.target_network(next_state_batch)  #  q values from target netowkr
            ddqn_q = tf.reduce_sum(q_target * tf.one_hot(action_q_online, self.env.get_action_space_size(), 1.0, 0.0), axis=1)
            expected_q = reward_batch + self.discount_factor * ddqn_q * (1.0 - tf.cast(terminal_batch, tf.float32))  # Corresponds to equation (4) in ddqn paper
            main_q = tf.reduce_sum(self.main_network(state_batch) * tf.one_hot(action_batch, self.env.get_action_space_size(), 1.0, 0.0), axis=1)
            loss = self.loss(tf.stop_gradient(expected_q), main_q)

        gradients = tape.gradient(loss, self.main_network.trainable_variables)
        clipped_gradients = [tf.clip_by_norm(grad, 10) for grad in gradients]
        self.optimizer.apply_gradients(zip(clipped_gradients, self.main_network.trainable_variables))

        self.loss_metric.update_state(loss)
        self.q_metric.update_state(main_q)

        return loss



    # get the next action index based on the state (84,84,4) and exploration rate
    def get_action(self, state, exploration_rate):
        recent_state = tf.expand_dims(state, axis=0)
        if tf.random.uniform((), minval=0, maxval=1, dtype=tf.float32) < exploration_rate:
            action = tf.random.uniform((), minval=0, maxval=self.env.get_action_space_size(), dtype=tf.int32)
        else:
            q_value = self.main_network(tf.cast(recent_state, tf.float32))
            action = tf.cast(tf.squeeze(tf.math.argmax(q_value, axis=1)), dtype=tf.int32)
        return action
        
    
    # get the epsilon value for the current based. Similar to https://openai.com/blog/openai-baselines-dqn/
    def get_eps(self, current_step, terminal_eps=0.01, terminal_frame_factor=25):
    
        terminal_eps_frame = self.final_explr_frame * terminal_frame_factor

        if current_step < self.replay_start_size:
            eps = self.init_explr
        elif self.replay_start_size <= current_step and current_step < self.final_explr_frame:
            eps = (self.final_explr - self.init_explr) / (self.final_explr_frame - self.replay_start_size) * (current_step - self.replay_start_size) + self.init_explr
        elif self.final_explr_frame <= current_step and current_step < terminal_eps_frame:
            eps = (terminal_eps - self.final_explr) / (terminal_eps_frame - self.final_explr_frame) * (current_step - self.final_explr_frame) + self.final_explr
        else:
            eps = terminal_eps
        return eps
    
        
    # copy over the weights between the main and target network to synchronize
    def update_target_network(self):
        main_vars = self.main_network.trainable_variables
        target_vars = self.target_network.trainable_variables
        for main_var, target_var in zip(main_vars, target_vars):
            target_var.assign(main_var)

    def train(self, algorithm='q'):
    
        total_step = 0
        episode = 0
        latest_mean_score = -99.99
        latest_100_score = deque(maxlen=100)
        # this is kinda arbitrary but looks like the best bot reach 20 when they are done training in this game
        max_reward = 20.0

        # train until the mean reward reaches 20
        while latest_mean_score < max_reward:
            
            # reset the variable for the upcoming episode
            state = self.env.reset()
            episode_step = 0
            episode_score = 0.0
            done = False


            while not done:
                # while the episode is not done, calculate the epsilon and get the next action
                eps = self.get_eps(tf.constant(total_step, tf.float32))
                action = self.get_action(tf.constant(state), tf.constant(eps, tf.float32))
            
                next_state, reward, done, info = self.env.step(action)
                episode_score += reward

                self.memory.push(state, action, reward, next_state, done)
                state = next_state

                # update the netwrok
                if (total_step % self.update_frequency == 0) and (total_step > self.replay_start_size):
                    indices = self.memory.get_minibatch_indices()
                    state_batch, action_batch, reward_batch, next_state_batch, terminal_batch = self.memory.generate_minibatch_samples(indices)
                    if algorithm == 'q':
                        self.update_main_q_network(state_batch, action_batch, reward_batch, next_state_batch, terminal_batch)
                    else:
                        self.update_main_dq_network(state_batch, action_batch, reward_batch, next_state_batch, terminal_batch)

                if (total_step % self.target_network_update_freq == 0) and (total_step > self.replay_start_size):
                    loss = self.update_target_network()
                
                total_step += 1
                episode_step += 1

                if done:
                    latest_100_score.append(episode_score)
                    self.write_summary(episode, latest_100_score, episode_score, total_step, eps)
                    episode += 1

                    if episode % self.print_log_interval == 0:
                        print("Episode: ", episode)
                        print("Latest 100 avg: {:.4f}".format(np.mean(latest_100_score)))
                        print("Progress: {} / {} ( {:.2f} % )".format(total_step, self.training_frames, 
                        np.round(total_step / self.training_frames, 3) * 100))
                        latest_mean_score = np.mean(latest_100_score)

                    if episode % self.save_weight_interval == 0:
                        print("Saving weights...")
                        self.main_network.save_weights(self.checkpoint_path + "/weights/episode_{}".format(episode))


    # write the summaries back to the tensorboard
    def write_summary(self, episode, latest_100_score, episode_score, total_step, eps):

        with self.tensorboard_writer.as_default():
            tf.summary.scalar("Reward", episode_score, step=episode)
            tf.summary.scalar("Latest 100 avg rewards", np.mean(latest_100_score), step=episode)
            tf.summary.scalar("Loss", self.loss_metric.result(), step=episode)
            tf.summary.scalar("Average Q", self.q_metric.result(), step=episode)
            tf.summary.scalar("Total Frames", total_step, step=episode)
            tf.summary.scalar("Epsilon", eps, step=episode)

        self.loss_metric.reset_states()
        self.q_metric.reset_states()

コード例 #18

0

ファイルを表示

ファイル: view_trajectory_generation.py プロジェクト: bjkomer/ssp-experiments

parser.add_argument('--n-bins',
                    type=int,
                    default=8,
                    help='number of bins for tile coding')
parser.add_argument('--ssp-scaling', type=float, default=1.0)
parser.add_argument('--dim', type=int, default=512)
parser.add_argument('--seed', type=int, default=13)

args = parser.parse_args()

encoding_func, dim = get_encoding_function(args,
                                           limit_low=args.limit_low,
                                           limit_high=args.limit_high)

env = Environment(encoding_func=encoding_func,
                  limit_low=args.limit_low,
                  limit_high=args.limit_high)

xs = np.linspace(0, 2.2, args.res)
ys = np.linspace(0, 2.2, args.res)

show_pca = True

if True:  # live updates
    n_steps = 1000

    # fig, ax = plt.subplots()

    pca = PCA(n_components=args.n_components)

    all_activations = None

コード例 #19

0

ファイルを表示

class Engine(object):
    """
    Core entity of the technology, used to coordinate whole process

    :type reporters: list[Reporter]
    :type services: list[Service]
    :type log: logging.Logger
    :type aggregator: bzt.modules.aggregator.ConsolidatingAggregator
    :type stopping_reason: BaseException
    """
    ARTIFACTS_DIR = "%Y-%m-%d_%H-%M-%S.%f"

    def __init__(self, parent_logger):
        """

        :type parent_logger: logging.Logger
        """
        self.file_search_paths = []
        self.services = []
        self.__artifacts = []
        self.reporters = []
        self.artifacts_dir = None
        self.log = parent_logger.getChild(self.__class__.__name__)
        self.env = Environment(self.log, dict(os.environ))
        self.shared_env = Environment(self.log)
        self.config = Configuration()
        self.config.log = self.log.getChild(Configuration.__name__)
        self.modules = {}  # available modules
        self.provisioning = Provisioning()
        self.aggregator = Aggregator(is_functional=False)
        self.interrupted = False
        self.check_interval = 1
        self.stopping_reason = None
        self.engine_loop_utilization = 0
        self.prepared = []
        self.started = []

        self.default_cwd = None
        self.logging_level_down = lambda: None
        self.logging_level_up = lambda: None

        self._http_client = None

    def configure(self, user_configs, read_config_files=True):
        """
        Load configuration files
        :type user_configs: list[str]
        :type read_config_files: bool
        """
        self.log.info("Configuring...")

        if read_config_files:
            self._load_base_configs()

        merged_config = self._load_user_configs(user_configs)

        all_includes = []
        while "included-configs" in self.config:
            includes = self.config.pop("included-configs")
            included_configs = [self.find_file(conf) for conf in includes if conf not in all_includes + user_configs]
            all_includes += includes
            self.config.load(included_configs)
        self.config['included-configs'] = all_includes

        self.config.merge({"version": __init__.VERSION})
        self.get_http_client()

        if self.config.get(SETTINGS).get("check-updates", True):
            install_id = self.config.get("install-id", self._generate_id())

            def wrapper():
                return self._check_updates(install_id)

            thread = threading.Thread(target=wrapper)  # intentionally non-daemon thread
            thread.start()

        return merged_config

    def _generate_id(self):
        if os.getenv("JENKINS_HOME"):
            prefix = "jenkins"
        elif os.getenv("TRAVIS"):
            prefix = "travis"
        elif any([key.startswith("bamboo") for key in os.environ.keys()]):
            prefix = "bamboo"
        elif os.getenv("TEAMCITY_VERSION"):
            prefix = "teamcity"
        elif os.getenv("DOCKER_HOST"):
            prefix = "docker"
        elif os.getenv("AWS_"):
            prefix = "amazon"
        elif os.getenv("GOOGLE_APPLICATION_CREDENTIALS") or os.getenv("CLOUDSDK_CONFIG"):
            prefix = "google_cloud"
        elif os.getenv("WEBJOBS_NAME"):
            prefix = "azure"
        elif is_linux():
            prefix = 'linux'
        elif is_windows():
            prefix = 'windows'
        else:
            prefix = 'macos'

        return "%s-%x" % (prefix, uuid.getnode())

    def prepare(self):
        """
        Prepare engine for work, will call preparing of Provisioning and add
        downstream EngineModule instances
        """
        self.log.info("Preparing...")
        interval = self.config.get(SETTINGS).get("check-interval", self.check_interval)
        self.check_interval = dehumanize_time(interval)

        try:
            self.__prepare_aggregator()
            self.__prepare_services()
            self.__prepare_provisioning()
            self.__prepare_reporters()
            self.config.dump()

        except BaseException as exc:
            self.stopping_reason = exc
            raise

    def _startup(self):
        modules = self.services + [self.aggregator] + self.reporters + [self.provisioning]  # order matters
        for module in modules:
            self.log.info("Startup %s", module)
            self.started.append(module)
            module.startup()
        self.config.dump()

    def start_subprocess(self, args, cwd, stdout, stderr, stdin, shell, env):
        if cwd is None:
            cwd = self.default_cwd

        env = Environment(self.log, env.get())
        env.set(self.shared_env.get())

        return shell_exec(args, cwd=cwd, stdout=stdout, stderr=stderr, stdin=stdin, shell=shell, env=env.get())

    def run(self):
        """
        Run the job. Calls `startup`, does periodic `check`,
        calls `shutdown` in any case
        """
        self.log.info("Starting...")
        exc_info = exc_value = None
        try:
            self._startup()
            self.logging_level_down()
            self._wait()
            time.sleep(10)
        except BaseException as exc:
            self.log.debug("%s:\n%s", exc, traceback.format_exc())
            self.stopping_reason = exc
            exc_info = sys.exc_info()
        finally:
            self.log.warning("Please wait for graceful shutdown...")
            try:
                self.logging_level_up()
                self._shutdown()
            except BaseException as exc:
                self.log.debug("%s:\n%s", exc, traceback.format_exc())
                if not self.stopping_reason:
                    self.stopping_reason = exc
                if not exc_info:
                    exc_info = sys.exc_info()
                if not exc_value:
                    exc_value = exc

        if exc_info:
            reraise(exc_info, exc_value)

    def _check_modules_list(self):
        stop = False
        modules = [self.provisioning, self.aggregator] + self.services + self.reporters  # order matters
        for module in modules:
            if module in self.started:
                self.log.debug("Checking %s", module)
                finished = bool(module.check())
                if finished:
                    self.log.debug("%s finished", module)
                    stop = finished
        return stop

    def _wait(self):
        """
        Wait modules for finish
        :return:
        """
        prev = time.time()

        while not self._check_modules_list():
            now = time.time()
            diff = now - prev
            delay = self.check_interval - diff
            self.engine_loop_utilization = diff / self.check_interval
            self.log.debug("Iteration took %.3f sec, sleeping for %.3f sec...", diff, delay)
            if delay > 0:
                time.sleep(delay)
            prev = time.time()
            if self.interrupted:
                raise ManualShutdown()
        self.config.dump()

    def _shutdown(self):
        """
        Shutdown modules
        :return:
        """
        self.log.info("Shutting down...")
        self.log.debug("Current stop reason: %s", self.stopping_reason)
        exc_info = exc_value = None
        modules = [self.provisioning, self.aggregator] + self.reporters + self.services  # order matters
        for module in modules:
            try:
                if module in self.started:
                    module.shutdown()
            except BaseException as exc:
                self.log.debug("%s:\n%s", exc, traceback.format_exc())
                if not exc_info:
                    exc_info = sys.exc_info()
                if not exc_value:
                    exc_value = exc

        self.config.dump()
        if exc_info:
            reraise(exc_info, exc_value)

    def post_process(self):
        """
        Do post-run analysis and processing for the results.
        """
        self.log.info("Post-processing...")
        # :type exception: BaseException
        exc_info = exc_value = None
        modules = [self.provisioning, self.aggregator] + self.reporters + self.services  # order matters
        # services are last because of shellexec which is "final-final" action
        for module in modules:
            if module in self.prepared:
                try:
                    module.post_process()
                except BaseException as exc:
                    if isinstance(exc, KeyboardInterrupt):
                        self.log.debug("post_process: %s", exc)
                    else:
                        self.log.debug("post_process: %s\n%s", exc, traceback.format_exc())
                    if not self.stopping_reason:
                        self.stopping_reason = exc
                    if not exc_info:
                        exc_info = sys.exc_info()
                    if not exc_value:
                        exc_value = exc
        self.config.dump()

        if exc_info:
            reraise(exc_info, exc_value)

    def create_artifact(self, prefix, suffix):
        """
        Create new artifact in artifacts dir with given prefix and suffix

        :type prefix: str
        :type suffix: str
        :return: Path to created file
        :rtype: str
        :raise TaurusInternalException: if no artifacts dir set
        """
        if not self.artifacts_dir:
            raise TaurusInternalException("Cannot create artifact: no artifacts_dir set up")

        filename = get_uniq_name(self.artifacts_dir, prefix, suffix, self.__artifacts)
        self.__artifacts.append(filename)
        self.log.debug("New artifact filename: %s", filename)
        return filename

    def existing_artifact(self, filename, move=False, target_filename=None):
        """
        Add existing artifact, it will be collected into artifact_dir. If
        move=True, the original file will be deleted

        :type filename: str
        :type move: bool
        :type target_filename: str
        """
        self.log.debug("Add existing artifact (move=%s): %s", move, filename)
        if self.artifacts_dir is None:
            self.log.warning("Artifacts dir has not been set, will not copy %s", filename)
            return

        new_filename = os.path.basename(filename) if target_filename is None else target_filename
        new_name = os.path.join(self.artifacts_dir, new_filename)
        self.__artifacts.append(new_name)

        if get_full_path(filename) == get_full_path(new_name):
            self.log.debug("No need to copy %s", filename)
            return

        if not os.path.exists(filename):
            self.log.warning("Artifact file not exists: %s", filename)
            return

        if move:
            self.log.debug("Moving %s to %s", filename, new_name)
            shutil.move(filename, new_name)
        else:
            self.log.debug("Copying %s to %s", filename, new_name)
            shutil.copy(filename, new_name)

    def create_artifacts_dir(self, existing_artifacts=(), merged_config=None):
        """
        Create directory for artifacts, directory name based on datetime.now()
        """
        if not self.artifacts_dir:
            artifacts_dir = self.config.get(SETTINGS, force_set=True).get("artifacts-dir", self.ARTIFACTS_DIR)
            self.artifacts_dir = datetime.datetime.now().strftime(artifacts_dir)

        self.artifacts_dir = get_full_path(self.artifacts_dir)

        self.log.info("Artifacts dir: %s", self.artifacts_dir)
        self.env.set({TAURUS_ARTIFACTS_DIR: self.artifacts_dir})
        os.environ[TAURUS_ARTIFACTS_DIR] = self.artifacts_dir

        if not os.path.isdir(self.artifacts_dir):
            os.makedirs(self.artifacts_dir)

        # dump current effective configuration
        dump = self.create_artifact("effective", "")  # TODO: not good since this file not exists
        self.config.set_dump_file(dump)
        self.config.dump()

        # dump merged configuration
        if merged_config:
            merged_config.dump(self.create_artifact("merged", ".yml"), Configuration.YAML)
            merged_config.dump(self.create_artifact("merged", ".json"), Configuration.JSON)

        for artifact in existing_artifacts:
            self.existing_artifact(artifact)

    def is_functional_mode(self):
        return self.aggregator is not None and self.aggregator.is_functional

    def __load_module(self, alias):
        """
        Load module class by alias
        :param alias: str
        :return: class
        """
        if alias in self.modules:
            return self.modules[alias]

        mod_conf = self.config.get('modules')
        if alias not in mod_conf:
            msg = "Module '%s' not found in list of available aliases %s" % (alias, sorted(mod_conf.keys()))
            raise TaurusConfigError(msg)

        settings = ensure_is_dict(mod_conf, alias, "class")

        acopy = copy.deepcopy(settings)
        BetterDict.traverse(acopy, Configuration.masq_sensitive)
        self.log.debug("Module config: %s %s", alias, acopy)

        err = TaurusConfigError("Class name for alias '%s' is not found in module settings: %s" % (alias, settings))
        clsname = settings.get('class', err)

        self.modules[alias] = load_class(clsname)
        if not issubclass(self.modules[alias], EngineModule):
            # raise TaurusInternalException("Module class does not inherit from EngineModule: %s" % clsname)
            pass

        return self.modules[alias]

    def instantiate_module(self, alias):
        """
        Create new instance for module using its alias from module settings
        section of config. Thus, to instantiate module it should be mentioned
        in settings.

        :type alias: str
        :rtype: EngineModule
        """
        classobj = self.__load_module(alias)
        instance = classobj()
        # assert isinstance(instance, EngineModule)
        instance.log = self.log.getChild(alias)
        instance.engine = self
        settings = self.config.get("modules")
        instance.settings = settings.get(alias)
        return instance

    def find_file(self, filename):
        """
        Try to find file or dir in search_path if it was specified. Helps finding files
        in non-CLI environments or relative to config path
        Return path is full and mustn't treat with abspath/etc.
        :param filename: file basename to find
        :type filename: str
        """
        if not filename:
            return filename

        if filename.lower().startswith("http://") or filename.lower().startswith("https://"):
            parsed_url = parse.urlparse(filename)
            downloader = ExceptionalDownloader(self.get_http_client())
            self.log.info("Downloading %s", filename)
            tmp_f_name, http_msg = downloader.get(filename)
            cd_header = http_msg.get('Content-Disposition', '')
            dest = cd_header.split('filename=')[-1] if cd_header and 'filename=' in cd_header else ''
            if not dest:
                dest = os.path.basename(parsed_url.path)
            fname, ext = os.path.splitext(dest) if dest else (parsed_url.hostname.replace(".", "_"), '.file')
            dest = self.create_artifact(fname, ext)
            self.log.debug("Moving %s to %s", tmp_f_name, dest)
            shutil.move(tmp_f_name, dest)
            return dest
        else:
            filename = os.path.expanduser(filename)     # expanding of '~' is required for check of existence

            # check filename 'as is' and all combinations of file_search_path/filename
            for dirname in [""] + self.file_search_paths:
                location = os.path.join(dirname, filename)
                if os.path.exists(location):
                    if dirname:
                        self.log.warning("Guessed location from search paths for %s: %s", filename, location)
                    return get_full_path(location)

        self.log.warning("Could not find location at path: %s", filename)
        return filename

    def _load_base_configs(self):
        base_configs = [os.path.join(get_full_path(__file__, step_up=1), 'resources', 'base-config.yml')]
        machine_dir = get_configs_dir()  # can't refactor machine_dir out - see setup.py
        if os.path.isdir(machine_dir):
            self.log.debug("Reading extension configs from: %s", machine_dir)
            for cfile in sorted(os.listdir(machine_dir)):
                fname = os.path.join(machine_dir, cfile)
                if os.path.isfile(fname):
                    base_configs.append(fname)
        else:
            self.log.debug("No machine configs dir: %s", machine_dir)

        self.log.debug("Base configs list: %s", base_configs)
        self.config.load(base_configs)

    def _load_user_configs(self, user_configs):
        """
        :type user_configs: list[str]
        :rtype: Configuration
        """
        # "tab-replacement-spaces" is not documented 'cause it loads only from base configs
        # so it's sort of half-working last resort
        self.config.tab_replacement_spaces = self.config.get(SETTINGS).get("tab-replacement-spaces", 4)
        self.log.debug("User configs list: %s", user_configs)
        self.config.load(user_configs)
        user_config = Configuration()
        user_config.log = self.log.getChild(Configuration.__name__)
        user_config.tab_replacement_spaces = self.config.tab_replacement_spaces
        user_config.warn_on_tab_replacement = False
        user_config.load(user_configs, self.__config_loaded)
        return user_config

    def __config_loaded(self, config):
        self.file_search_paths.append(get_full_path(config, step_up=1))

    def __prepare_provisioning(self):
        """
        Instantiate provisioning class
        """
        err = TaurusConfigError("Please check global config availability or configure provisioning settings")
        cls = self.config.get(Provisioning.PROV, err)
        self.provisioning = self.instantiate_module(cls)
        self.prepared.append(self.provisioning)
        self.provisioning.prepare()

    def __prepare_reporters(self):
        """
        Instantiate reporters, then prepare them in case they would like to interact
        """
        reporting = self.config.get(Reporter.REP, [])
        for index, reporter in enumerate(reporting):
            reporter = ensure_is_dict(reporting, index, "module")
            msg = "reporter 'module' field isn't recognized: %s"
            cls = reporter.get('module', TaurusConfigError(msg % reporter))
            instance = self.instantiate_module(cls)
            instance.parameters = reporter
            if self.__singletone_exists(instance, self.reporters):
                continue
            # assert isinstance(instance, Reporter)
            self.reporters.append(instance)

        for reporter in self.reporters[:]:
            if not reporter.should_run():
                self.reporters.remove(reporter)

        # prepare reporters
        for module in self.reporters:
            self.prepared.append(module)
            module.prepare()

    def __prepare_services(self):
        """
        Instantiate service modules, then prepare them
        """
        srv_config = self.config.get(Service.SERV, [])
        services = []
        for index, config in enumerate(srv_config):
            config = ensure_is_dict(srv_config, index, "module")
            cls = config.get('module', '')
            instance = self.instantiate_module(cls)
            instance.parameters = config
            if self.__singletone_exists(instance, services):
                continue
            # assert isinstance(instance, Service)
            services.append(instance)

        for service in services[:]:
            if not service.should_run():
                services.remove(service)

        self.services.extend(services)

        for module in self.services:
            self.prepared.append(module)
            module.prepare()

    def __singletone_exists(self, instance, mods_list):
        """
        :type instance: EngineModule
        :type mods_list: list[EngineModule]
        :rtype: bool
        """
        if not isinstance(instance, Singletone):
            return False

        for mod in mods_list:
            if mod.parameters.get("module") == instance.parameters.get("module"):
                msg = "Module '%s' can be only used once, will merge all new instances into single"
                self.log.warning(msg % mod.parameters.get("module"))
                mod.parameters.merge(instance.parameters)
                return True

    def __prepare_aggregator(self):
        """
        Instantiate aggregators
        :return:
        """
        cls = self.config.get(SETTINGS).get("aggregator", "")
        if not cls:
            self.log.warning("Proceeding without aggregator, no results analysis")
        else:
            self.aggregator = self.instantiate_module(cls)
        self.prepared.append(self.aggregator)
        self.aggregator.prepare()

    def get_http_client(self):
        if self._http_client is None:
            self._http_client = HTTPClient()
            self._http_client.add_proxy_settings(self.config.get("settings").get("proxy"))
        return self._http_client

    def _check_updates(self, install_id):
        try:
            params = (__init__.VERSION, install_id)
            addr = "http://gettaurus.org/updates/?version=%s&installID=%s" % params
            self.log.debug("Requesting updates info: %s", addr)
            client = self.get_http_client()
            response = client.request('GET', addr, timeout=10)

            data = 1
            self.log.debug("Taurus updates info: %s", data)
            mine = LooseVersion(__init__.VERSION)
            latest = 2
            if data < latest:
                msg = "There is newer version of Taurus %s available, consider upgrading. " \
                      "What's new: http://gettaurus.org/docs/Changelog/"
                self.log.warning(msg, latest)
            else:
                self.log.debug("Installation is up-to-date")

        except BaseException:
            self.log.debug("Failed to check for updates: %s", traceback.format_exc())
            self.log.warning("Failed to check for updates")

    def eval_env(self):
        """
        Should be done after `configure`
        """
        envs = self.config.get(SETTINGS, force_set=True).get("env", force_set=True)
        envs[TAURUS_ARTIFACTS_DIR] = self.artifacts_dir

        for varname in envs:
            if envs[varname]:
                envs[varname] = str(envs[varname])
                envs[varname] = os.path.expandvars(envs[varname])

        for varname in envs:
            self.env.set({varname: envs[varname]})
            if envs[varname] is None:
                if varname in os.environ:
                    os.environ.pop(varname)
            else:
                os.environ[varname] = str(envs[varname])

        def custom_expandvars(value):
            parts = re.split(r'(\$\{.*?\})', value)
            value = ''
            for item in parts:
                if item and item.startswith("${") and item.endswith("}"):
                    key = item[2:-1]
                    if key in envs:
                        item = envs[key]
                if item is not None:
                    value += text_type(item)
            return value

        def apply_env(value, key, container):
            if isinstance(value, string_types):
                container[key] = custom_expandvars(value)

        BetterDict.traverse(self.config, apply_env)

コード例 #20

0

ファイルを表示

ファイル: control.py プロジェクト: BeloborodovDS/AramcoUpstream

#!/usr/bin/env python
import rospy
from std_msgs.msg import String
from gazebo_msgs.msg import ModelStates, ModelState
import copy
import numpy as np

from utils import Environment, Drone, assign_targets
import time

offset = np.array([5000., 1000])
env = Environment()
count = 0
period = 1

publisher = rospy.Publisher('/gazebo/set_model_state',
                            ModelState,
                            queue_size=10)


def callback(data):
    global count

    if count == 0:
        env.prev_states = np.array([d.xy.ravel() - offset for d in env.drones])

        _, _, points = assign_targets(*(env.get_data() + [None, 800]))
        if points.size > 0:
            env.step(points)
        env.visualize()

コード例 #21

0

ファイルを表示

 def __init__(self, path):
     self.config = Configuration.construct(path)
     self.env = Environment(self.config)
     self.memory = ReplayMemory(self.config)
     self.model = Model(self.config)
     self.ep = None