コード例 #1
0
    def create_actors(self):
        """ Connect to the cluster and start sampling of the remote actor.
        """
        parl.connect(self.config['master_address'])

        logger.info('Waiting for {} remote actors to connect.'.format(
            self.config['actor_num']))

        ident = 0
        self.predict_output_queues = []

        for i in six.moves.range(self.config['actor_num']):

            self.remote_count += 1
            logger.info('Remote simulator count: {}'.format(self.remote_count))
            if self.start_time is None:
                self.start_time = time.time()

            q = queue.Queue()
            self.predict_output_queues.append(q)

            remote_thread = threading.Thread(target=self.run_remote_sample,
                                             args=(ident, ))
            remote_thread.setDaemon(True)
            remote_thread.start()
            ident += 1
コード例 #2
0
    def test_sync_config_file(self):
        master = Master(port=1335)
        th = threading.Thread(target=master.run)
        th.start()
        time.sleep(1)
        worker = Worker('localhost:1335', 1)

        random_file = 'random.npy'
        random_array = np.random.randn(3, 5)
        np.save(random_file, random_array)
        random_sum = random_array.sum()

        with open('config.json', 'w') as f:
            config_file = {'test': 1000}
            json.dump(config_file, f)

        parl.connect('localhost:1335', ['random.npy', 'config.json'])
        actor = Actor('random.npy', 'config.json')
        time.sleep(5)
        os.remove('./random.npy')
        os.remove('./config.json')
        remote_sum = actor.random_sum()
        self.assertEqual(remote_sum, random_sum)
        time.sleep(10)

        remote_config = actor.read_config()
        self.assertEqual(config_file['test'], remote_config)

        del actor
        worker.exit()
        master.exit()
コード例 #3
0
 def _connect_and_create_actor(cluster_addr):
     parl.connect(cluster_addr)
     for _ in range(2):
         actor = Actor()
         ret = actor.add_one(1)
         assert ret == 2
     disconnect()
コード例 #4
0
    def test_max_memory(self):
        port = 3001
        master = Master(port=port)
        th = threading.Thread(target=master.run)
        th.start()
        time.sleep(5)
        cluster_addr = 'localhost:{}'.format(port)
        worker = Worker(cluster_addr, 1)
        cluster_monitor = ClusterMonitor(cluster_addr)
        time.sleep(5)
        parl.connect(cluster_addr)
        actor = Actor()
        time.sleep(20)
        self.assertEqual(1, cluster_monitor.data['clients'][0]['actor_num'])
        del actor
        time.sleep(10)
        p = Process(target=self.actor, args=(cluster_addr, ))
        p.start()

        for _ in range(6):
            x = cluster_monitor.data['clients'][0]['actor_num']
            if x == 0:
                break
            else:
                time.sleep(10)
        if x == 1:
            raise ValueError("Actor max memory test failed.")
        self.assertEqual(0, cluster_monitor.data['clients'][0]['actor_num'])
        p.terminate()

        worker.exit()
        master.exit()
コード例 #5
0
    def test_send_file(self):
        port = 1239
        master = Master(port=port)
        th = threading.Thread(target=master.run)
        th.start()
        worker = Worker('localhost:{}'.format(port), 1)
        time.sleep(2)

        tmp_dir = 'rom_files'
        tmp_file = os.path.join(tmp_dir, 'pong.bin')
        os.system('mkdir {}'.format(tmp_dir))
        if _IS_WINDOWS:
            os.system('type NUL >> {}'.format(tmp_file))
        else:
            os.system('touch {}'.format(tmp_file))
        assert os.path.exists(tmp_file)
        parl.connect('localhost:{}'.format(port), distributed_files=[tmp_file])
        time.sleep(5)
        actor = Actor()
        for _ in range(10):
            if actor.check_local_file():
                break
            time.sleep(10)
        self.assertEqual(True, actor.check_local_file())
        del actor
        time.sleep(10)
        worker.exit()
        master.exit()
コード例 #6
0
    def test_create_actor_in_multiprocessing(self):
        # start the master
        master = Master(port=8240)
        th = threading.Thread(target=master.run)
        th.start()
        time.sleep(1)

        worker1 = Worker('localhost:8240', 4)
        parl.connect('localhost:8240')

        if not _IS_WINDOWS:  # In windows, fork process cannot access client created in main process.
            proc1 = multiprocessing.Process(target=self._create_actor)
            proc2 = multiprocessing.Process(target=self._create_actor)
            proc1.start()
            proc2.start()

            proc1.join()
            proc2.join()
            print("[test_create_actor_in_multiprocessing]  Join")

        # make sure that the client of the main process still works
        self._create_actor()

        worker1.exit()
        master.exit()
コード例 #7
0
    def test_job_exit_exceptionally(self):
        master = Master(port=1334)
        th = threading.Thread(target=master.run)
        th.start()
        time.sleep(1)
        worker1 = Worker('localhost:1334', 4)
        time.sleep(10)
        self.assertEqual(worker1.job_buffer.full(), True)
        time.sleep(1)
        self.assertEqual(master.cpu_num, 4)
        print("We are going to kill all the jobs.")
        if _IS_WINDOWS:
            command = r'''for /F "skip=2 tokens=2 delims=," %a in ('wmic process where "commandline like '%remote\\job.py%'" get processid^,status /format:csv') do taskkill /F /T /pid %a'''
            print(os.popen(command).read())
        else:
            command = (
                "ps aux | grep remote/job.py | awk '{print $2}' | xargs kill -9"
            )
            subprocess.call([command], shell=True)
        parl.connect('localhost:1334')
        actor = Actor()
        self.assertEqual(actor.add_one(1), 2)
        time.sleep(20)

        master.exit()
        worker1.exit()
コード例 #8
0
 def test_actor_exception_2(self):
     logger.info("running: test_actor_exception_2")
     master = Master(port=8236)
     th = threading.Thread(target=master.run)
     th.start()
     time.sleep(3)
     worker1 = Worker('localhost:8236', 1)
     self.assertEqual(1, master.cpu_num)
     parl.connect('localhost:8236')
     actor = Actor()
     try:
         actor.will_raise_exception_func()
     except:
         pass
     actor2 = Actor()
     for _ in range(5):
         if master.cpu_num == 0:
             break
         time.sleep(10)
     self.assertEqual(actor2.add_one(1), 2)
     self.assertEqual(0, master.cpu_num)
     del actor
     del actor2
     worker1.exit()
     master.exit()
コード例 #9
0
    def test_actor_exception(self):
        logger.info("running:test_actor_exception")
        master = Master(port=8235)
        th = threading.Thread(target=master.run)
        th.start()
        time.sleep(3)
        worker1 = Worker('localhost:8235', 1)
        for _ in range(3):
            if master.cpu_num == 1:
                break
            time.sleep(10)
        self.assertEqual(1, master.cpu_num)
        logger.info("running:test_actor_exception: 0")
        parl.connect('localhost:8235')
        logger.info("running:test_actor_exception: 1")

        with self.assertRaises(exceptions.RemoteError):
            actor = Actor(abcd='a bug')
        logger.info("running:test_actor_exception: 2")

        actor2 = Actor()
        for _ in range(3):
            if master.cpu_num == 0:
                break
            time.sleep(10)
        self.assertEqual(actor2.add_one(1), 2)
        self.assertEqual(0, master.cpu_num)

        master.exit()
        worker1.exit()
コード例 #10
0
ファイル: train.py プロジェクト: YuechengLiu/PARL
    def create_actors(self):
        """Connect to the cluster and start sampling of the remote actor.
        """
        parl.connect(args.cluster_address, ['official_obs_scaler.npz'])

        for i in range(args.actor_num):
            logger.info('Remote actor count: {}'.format(i + 1))

            remote_thread = threading.Thread(target=self.run_remote_sample)
            remote_thread.setDaemon(True)
            remote_thread.start()

        # There is a memory-leak problem in osim-rl package.
        # So we will dynamically add actors when remote actors killed due to excessive memory usage.
        time.sleep(10 * 60)
        parl_client = get_global_client()
        while True:
            if parl_client.actor_num < args.actor_num:
                logger.info(
                    'Dynamic adding acotr, current actor num:{}'.format(
                        parl_client.actor_num))
                remote_thread = threading.Thread(target=self.run_remote_sample)
                remote_thread.setDaemon(True)
                remote_thread.start()
            time.sleep(5)
コード例 #11
0
    def test_connect_and_create_actor_in_multiprocessing_with_connected_in_main_process(
            self):
        # start the master
        master = Master(port=8238)
        th = threading.Thread(target=master.run)
        th.start()
        time.sleep(1)

        worker1 = Worker('localhost:8238', 4)
        parl.connect('localhost:8238')

        proc1 = multiprocessing.Process(target=self._connect_and_create_actor,
                                        args=('localhost:8238', ))
        proc2 = multiprocessing.Process(target=self._connect_and_create_actor,
                                        args=('localhost:8238', ))
        proc1.start()
        proc2.start()

        proc1.join()
        proc2.join()

        # make sure that the client of the main process still works
        self._create_actor()

        worker1.exit()
        master.exit()
コード例 #12
0
ファイル: log_server_test.py プロジェクト: YuechengLiu/PARL
 def _connect_and_create_actor(cluster_addr):
     parl.connect(cluster_addr)
     outputs = []
     for i in range(2):
         actor = Actor(number=i)
         ret = actor.sim_output(1, 4)
         assert ret != ""
         outputs.append(ret)
     return outputs
コード例 #13
0
ファイル: env_utils.py プロジェクト: ShuaibinLi/RL_CARLA
 def __init__(self, env_name, xparl_addr, train_envs_params):
     parl.connect(xparl_addr)
     self.env_list = [
         CarlaRemoteEnv(env_name=env_name, params=params)
         for params in train_envs_params
     ]
     self.env_num = len(self.env_list)
     self.episode_reward_list = [0] * self.env_num
     self.episode_steps_list = [0] * self.env_num
     self._max_episode_steps = train_envs_params[0]['max_time_episode']
     self.total_steps = 0
コード例 #14
0
ファイル: Coach.py プロジェクト: YuechengLiu/PARL
    def _create_remote_actors(self):
        # connect to xparl cluster to submit jobs
        parl.connect(self.args.master_address)

        for i in range(self.args.actors_num):
            signal_queue = queue.Queue()
            self.remote_actors_signal_queues.append(signal_queue)

            remote_thread = threading.Thread(target=self._run_remote_tasks,
                                             args=(signal_queue, ))
            remote_thread.setDaemon(True)
            remote_thread.start()
コード例 #15
0
ファイル: train.py プロジェクト: YuechengLiu/PARL
    def create_actors(self):
        """ Connect to the cluster and start sampling of the remote actor.
        """
        parl.connect(self.config['master_address'])

        logger.info('Waiting for {} remote actors to connect.'.format(
            self.config['actor_num']))

        for i in range(self.config['actor_num']):
            self.remote_count += 1
            logger.info('Remote actor count: {}'.format(self.remote_count))
            if self.start_time is None:
                self.start_time = time.time()

            remote_thread = threading.Thread(target=self.run_remote_sample)
            remote_thread.setDaemon(True)
            remote_thread.start()
コード例 #16
0
 def test_cluster_status(self):
     port = 4321
     master = Master(port=port)
     th = threading.Thread(target=master.run)
     th.start()
     time.sleep(5)
     worker = Worker('localhost:{}'.format(port), 1)
     time.sleep(5)
     status_info = master.cluster_monitor.get_status_info()
     self.assertEqual(status_info, 'has 0 used cpus, 1 vacant cpus.')
     parl.connect('localhost:{}'.format(port))
     actor = Actor()
     time.sleep(50)
     status_info = master.cluster_monitor.get_status_info()
     self.assertEqual(status_info, 'has 1 used cpus, 0 vacant cpus.')
     worker.exit()
     master.exit()
コード例 #17
0
 def test_get_attribute(self):
     logger.info("running:test_get_attirbute")
     master = Master(port=8507)
     th = threading.Thread(target=master.run)
     th.start()
     time.sleep(3)
     worker1 = Worker('localhost:8507', 1)
     arg1 = np.random.randint(100)
     arg2 = np.random.randn()
     arg3 = np.random.randn(3, 3)
     parl.connect('localhost:8507')
     actor = Actor(arg1, arg2, arg3)
     self.assertTrue(arg1 == actor.arg1)
     self.assertTrue(arg2 == actor.arg2)
     self.assertTrue((arg3 == actor.arg3).all())
     master.exit()
     worker1.exit()
コード例 #18
0
    def create_actors(self):
        """ create actors for parallel training.
        """

        parl.connect(self.config['master_address'])
        self.remote_count = 0
        for i in range(self.config['actor_num']):
            signal_queue = queue.Queue()
            output_queue = queue.Queue()
            self.actors_signal_input_queues.append(signal_queue)
            self.actors_output_queues.append(output_queue)

            self.remote_count += 1

            remote_thread = threading.Thread(target=self.run_remote_sample,
                                             args=(signal_queue, output_queue))
            remote_thread.setDaemon(True)
            remote_thread.start()

        logger.info('All remote actors are ready, begin to learn.')
コード例 #19
0
ファイル: train.py プロジェクト: YuechengLiu/PARL
    def create_actors(self):
        parl.connect(self.config['master_address'])

        logger.info('Waiting for {} remote actors to connect.'.format(
            self.config['actor_num']))

        for i in six.moves.range(self.config['actor_num']):
            params_queue = queue.Queue()
            self.params_queues.append(params_queue)

            self.remote_count += 1
            logger.info('Remote actor count: {}'.format(self.remote_count))

            remote_thread = threading.Thread(
                target=self.run_remote_sample, args=(params_queue, ))
            remote_thread.setDaemon(True)
            remote_thread.start()

        logger.info('All remote actors are ready, begin to learn.')
        self.start_time = time.time()
コード例 #20
0
    def test_acor_exit_exceptionally(self):
        port = 1337
        master = Master(port)
        th = threading.Thread(target=master.run)
        th.start()
        time.sleep(1)
        worker1 = Worker('localhost:{}'.format(port), 1)

        file_path = __file__.replace('reset_job_test', 'simulate_client')
        command = [sys.executable, file_path]
        proc = subprocess.Popen(command)
        for _ in range(6):
            if master.cpu_num == 0:
                break
            else:
                time.sleep(10)
        self.assertEqual(master.cpu_num, 0)
        proc.kill()

        parl.connect('localhost:{}'.format(port))
        actor = Actor()
        master.exit()
        worker1.exit()
        disconnect()
コード例 #21
0
    def test_reset_actor(self):
        logger.info("running: test_reset_actor")
        # start the master
        master = Master(port=8237)
        th = threading.Thread(target=master.run)
        th.start()
        time.sleep(3)

        worker1 = Worker('localhost:8237', 4)
        parl.connect('localhost:8237')
        for _ in range(10):
            actor = Actor()
            ret = actor.add_one(1)
            self.assertEqual(ret, 2)
        del actor

        for _ in range(10):
            if master.cpu_num == 4:
                break
            time.sleep(10)

        self.assertEqual(master.cpu_num, 4)
        worker1.exit()
        master.exit()
コード例 #22
0
 def actor(cluster_addr):
     parl.connect(cluster_addr)
     actor1 = Actor()
     time.sleep(10)
     actor1.add_500mb()
コード例 #23
0
ファイル: simulate_client.py プロジェクト: YuechengLiu/PARL
def train():
    parl.connect('localhost:1337')
    actor = Actor()
    actor.add_one(1)
    time.sleep(100000)
コード例 #24
0
from parl.remote.client import disconnect
from parl.remote.master import Master
from parl.remote.worker import Worker
import time
import threading

c = 10
port = 3002
if __name__ == '__main__':
    master = Master(port=port)
    th = threading.Thread(target=master.run)
    th.setDaemon(True)
    th.start()
time.sleep(5)
cluster_addr = 'localhost:{}'.format(port)
parl.connect(cluster_addr)
worker = Worker(cluster_addr, 1)


@parl.remote_class
class Actor(object):
    def add(self, a, b):
        return a + b + c


actor = Actor()


class TestRecursive_actor(unittest.TestCase):
    def tearDown(self):
        disconnect()
コード例 #25
0
ファイル: test.py プロジェクト: Heptazhou/liqibot2

@parl.remote_class
class Actor(object):
    def hello_world(self):
        print("Hello world.")

    def add(self, a, b, f, l):
        time.sleep(2)
        f()
        l.append(b)
        return a + b


# Connect to the master node.
parl.connect("localhost:8010")


def thread(i, a, f, l):
    ans = a.add(1, i, f, l)
    print(ans)


def f():
    print('f')


def main():

    ts = []
    #l = []