Esempio n. 1
0
    def __init__(
        self, cfg, obs_space, action_space, num_agents, worker_idx, shared_buffers,
        task_queue, policy_queues, report_queue, learner_queues,
    ):
        self.cfg = cfg
        self.obs_space = obs_space
        self.action_space = action_space
        self.num_agents = num_agents

        self.worker_idx = worker_idx

        self.shared_buffers = shared_buffers

        self.terminate = False

        self.num_complete_rollouts = 0

        self.vector_size = cfg.num_envs_per_worker
        self.num_splits = cfg.worker_num_splits
        assert self.vector_size >= self.num_splits
        assert self.vector_size % self.num_splits == 0, 'Vector size should be divisible by num_splits'

        self.env_runners = None

        self.policy_queues = policy_queues
        self.report_queue = report_queue
        self.learner_queues = learner_queues
        self.task_queue = task_queue

        self.reward_shaping = [None for _ in range(self.cfg.num_policies)]

        self.process = TorchProcess(target=self._run, daemon=True)
        self.process.start()
Esempio n. 2
0
    def __init__(
        self, cfg, obs_space, action_space, num_agents, worker_idx, shared_buffers,
        task_queue, policy_queues, report_queue, learner_queues,
    ):
        """
        Ctor.

        :param cfg: global config (all CLI params)
        :param obs_space: observation space (spaces) of the environment
        :param action_space: action space(s)
        :param num_agents: number of agents per env (all env should have the same number of agents right now,
        although it should be easy to fix)
        :param worker_idx: index of this worker process
        :param shared_buffers: shared memory data structures initialized in main process (see shared_buffers.py)
        :param task_queue: queue for incoming messages for THIS particular actor worker. See the task types in the loop
        below, but the most common task is ROLLOUT_STEP, which means "here's your actions, advance simulation by
        one step".
        :param policy_queues: FIFO queues associated with all policies participating in training. We send requests
        for policy queue #N to get actions for envs (agents) that are controlled by policy #N.
        :param report_queue: one-way communication with the main process, various stats and whatnot
        :param learner_queues: one-way communication with the learner, sending trajectory buffers for learning
        """

        self.cfg = cfg
        self.obs_space = obs_space
        self.action_space = action_space
        self.num_agents = num_agents

        self.worker_idx = worker_idx

        self.shared_buffers = shared_buffers

        self.terminate = False

        self.num_complete_rollouts = 0

        self.vector_size = cfg.num_envs_per_worker
        self.num_splits = cfg.worker_num_splits
        assert self.vector_size >= self.num_splits
        assert self.vector_size % self.num_splits == 0, 'Vector size should be divisible by num_splits'

        self.env_runners = None

        self.policy_queues = policy_queues
        self.report_queue = report_queue
        self.learner_queues = learner_queues
        self.task_queue = task_queue

        self.reward_shaping = [None for _ in range(self.cfg.num_policies)]

        self.process = TorchProcess(target=self._run, daemon=True)
        self.process.start()
Esempio n. 3
0
    def __init__(self, worker_idx, policy_id, cfg, obs_space, action_space,
                 shared_buffers, policy_queue, actor_queues, report_queue,
                 task_queue, policy_lock, resume_experience_collection_cv):
        super().__init__()
        log.info('Initializing policy worker %d for policy %d', worker_idx,
                 policy_id)

        self.worker_idx = worker_idx
        self.policy_id = policy_id
        self.cfg = cfg

        self.obs_space = obs_space
        self.action_space = action_space

        self.device = None
        self.actor_critic = None
        self.shared_model_weights = None
        self.policy_lock = policy_lock
        self.resume_experience_collection_cv = resume_experience_collection_cv

        self.policy_queue = policy_queue
        self.actor_queues = actor_queues
        self.report_queue = report_queue

        # queue other components use to talk to this particular worker
        self.task_queue = task_queue

        self.initialized = False
        self.terminate = False
        self.initialized_event = multiprocessing.Event()
        self.initialized_event.clear()

        self.shared_buffers = shared_buffers
        self.tensors_individual_transitions = self.shared_buffers.tensors_individual_transitions
        self.policy_versions = shared_buffers.policy_versions
        self.stop_experience_collection = shared_buffers.stop_experience_collection

        self.latest_policy_version = -1
        self.num_policy_updates = 0

        self.requests = []

        self.total_num_samples = 0

        self.process = TorchProcess(target=self._run, daemon=True)
Esempio n. 4
0
                               this_rank,
                               test_data,
                               batch_size=test_bsz)

    world_size = len(workers) + 1

    class MyManager(BaseManager):
        pass

    MyManager.register('get_queue')
    MyManager.register('get_param')
    MyManager.register('get_stop_signal')
    manager = MyManager(address=(args.ps_ip, 5000), authkey=b'queue')
    manager.connect()

    q = manager.get_queue()  # Queue receiving the model
    param_q = manager.get_param()  # Queue reveiving the initial model
    stop_signal = manager.get_stop_signal()  # Queue receiving the stop signal

    stop_flag = Value(c_bool, False)
    # Define a process monitoring the stop signal
    stop_p = Process(target=capture_stop, args=(stop_signal, stop_flag))

    p = TorchProcess(target=init_processes,
                     args=(this_rank, world_size, model, train_data, test_data,
                           q, param_q, stop_flag, run))
    p.start()
    stop_p.start()
    p.join()
    stop_p.join()
Esempio n. 5
0
                                            transform=test_transform)
        else:
            print('Model must be {} or {}!'.format('MnistCNN', 'AlexNet'))
            sys.exit(-1)
        models.append(model)
    train_bsz = args.train_bsz
    train_bsz /= len(workers)
    train_bsz = int(train_bsz)

    train_data = partition_dataset(train_dataset, workers)
    train_data_list = []
    for i in workers:
        train_data_sub = select_dataset(workers, i, train_data, batch_size=train_bsz)
        train_data_list.append(train_data_sub)

    test_bsz = 400
    # 用所有的测试数据测试
    test_data = DataLoader(test_dataset, batch_size=test_bsz, shuffle = False)

    iterations_epoch = int(len(train_dataset) / args.train_bsz)

    save_path = str(args.save_path)
    save_path = save_path.rstrip('/')

    p = TorchProcess(target=init_processes, args=(workers,
                                                  models, save_path,
                                                  train_data_list, test_data,iterations_epoch,
                                                  run))
    p.start()
    p.join()
Esempio n. 6
0
    train_bsz /= len(workers)
    train_bsz = int(train_bsz)

    train_data = partition_dataset(train_dataset, workers)
    test_data = partition_dataset(test_dataset, workers)

    this_rank = args.this_rank
    train_data = select_dataset(workers,
                                this_rank,
                                train_data,
                                batch_size=train_bsz)
    test_data = select_dataset(workers,
                               this_rank,
                               test_data,
                               batch_size=test_bsz)

    # Initialize the test dataset
    #test_data = DataLoader(test_dataset, batch_size=test_bsz, shuffle=True)

    world_size = len(workers) + 1

    save_path = str(args.save_path)
    save_path = save_path.rstrip('/')

    p = TorchProcess(target=init_processes,
                     args=(this_rank, world_size, workers, model, save_path,
                           train_data, test_data, run))
    p.start()
    p.join()
Esempio n. 7
0
parser = argparse.ArgumentParser()
# 集群信息
parser.add_argument('--ps-ip', type=str, default='127.0.0.1')
parser.add_argument('--ps-port', type=str, default='29000')
parser.add_argument('--this-rank', type=int, default=0)
parser.add_argument('--learners', type=str, default='1-2-3-4')

args = parser.parse_args()
'''
def run(rank, workers):
    pass

'''


def init_processes(rank, size, backend='tcp'):
    os.environ['MASTER_ADDR'] = args.ps_ip
    os.environ['MASTER_PORT'] = args.ps_port
    dist.init_process_group(backend, rank=rank, world_size=size)
    # fn(rank, workers)


if __name__ == '__main__':
    workers = [int(v) for v in str(args.learners).split('-')]
    world_size = len(workers) + 1

    this_rank = args.this_rank

    p = TorchProcess(target=init_processes, args=(this_rank, world_size))
    p.start()
    p.join()
Esempio n. 8
0
                   backend='tcp'):
    os.environ['MASTER_ADDR'] = args.ps_ip
    os.environ['MASTER_PORT'] = args.ps_port
    dist.init_process_group(backend, rank=rank, world_size=size)
    fn(rank, model, train_pics, train_bsz)


if __name__ == '__main__':
    # 随机数设置
    manual_seed = random.randint(1, 10000)
    random.seed(manual_seed)
    torch.manual_seed(manual_seed)

    workers = [int(v) for v in str(args.learners).split('-')]

    model = alexnet(num_classes=10)
    train_pics = 50000
    train_bsz = 64

    train_bsz /= len(workers)
    train_bsz = int(train_bsz)

    world_size = len(str(args.learners).split('-')) + 1
    this_rank = args.this_rank

    p = TorchProcess(target=init_processes,
                     args=(this_rank, world_size, model, train_pics, train_bsz,
                           run))
    p.start()
    p.join()
Esempio n. 9
0
 def __init__(self, inputs):
     self.inputs = inputs
     self.process = TorchProcess(target=self.act, daemon=True)
     self.process.start()
Esempio n. 10
0
    for i in workers:
        # 取得部分train_data,体现数据并行
        print('Start: {}, End: {}'.format(sp[i][0], sp[i][1]))
        train_data_sub = train_data[sp[i][0]:sp[i][1]].contiguous()
        train_data_list.append(train_data_sub)

    ntokens = len(corpus.dictionary)
    print("--------------------------", ntokens)

    models = []
    for i in range(workers_num + 1):
        model = RNNModel(args.model,
                         ntokens,
                         ninp=10,
                         nhid=10,
                         nlayers=2,
                         dropout=0.2,
                         tie_weights=True)
        models.append(model)

    print(get_parameter_number(model))

    save_path = str(args.save_path)
    save_path = save_path.rstrip('/')

    p = TorchProcess(target=init_processes,
                     args=(workers, models, save_path, train_data_list,
                           test_data, ntokens, train_batch_size, run))
    p.start()
    p.join()