Ejemplo n.º 1
0
    def __init__(self, env, worker_id, gamma, env_render, logger, verbose):
        self.env = env

        self.worker_id = worker_id

        # discount rate
        self.gamma = gamma

        self.trajectory = []

        # learning rate
        self.learning_rate = LEARNING_RATE

        self.env_render = env_render
        self.logger = logger
        self.verbose = verbose

        self.policy_model = rl_utils.get_rl_model(self.env).to(device)
        self.target_model = rl_utils.get_rl_model(self.env).to(device)

        self.target_model.load_state_dict(self.policy_model.state_dict())
        self.target_model.eval()

        self.optimizer = rl_utils.get_optimizer(
            parameters=self.policy_model.parameters(),
            learning_rate=self.learning_rate)

        self.memory = ReplayMemory(10000)
        self.steps_done = 0

        self.model = self.policy_model
Ejemplo n.º 2
0
    def __init__(self, env, worker_id, gamma, env_render, logger, verbose):
        self.env = env

        self.worker_id = worker_id

        # discount rate
        self.gamma = gamma

        self.trajectory = []

        # learning rate
        self.learning_rate = LEARNING_RATE

        self.env_render = env_render
        self.logger = logger
        self.verbose = verbose

        self.model = rl_utils.get_rl_model(self.env, self.worker_id)

        self.optimizer = rl_utils.get_optimizer(
            parameters=self.model.parameters(),
            learning_rate=self.learning_rate
        )
Ejemplo n.º 3
0
os.environ["CUDA_VISIBLE_DEVICES"] = CUDA_VISIBLE_DEVICES_NUMBER_LIST

if __name__ == "__main__":
    torch.manual_seed(SEED)

    if torch.cuda.is_available():
        torch.cuda.manual_seed(SEED)
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True

    utils.make_output_folders()
    utils.ask_file_removal()

    env = rl_utils.get_environment()
    rl_model = rl_utils.get_rl_model(env, -1)

    utils.print_configuration(env, rl_model)

    try:
        chief = Process(target=utils.run_chief, args=())
        chief.start()

        time.sleep(1.5)

        workers = []
        for worker_id in range(NUM_WORKERS):
            worker = Process(target=utils.run_worker, args=(worker_id, ))
            workers.append(worker)
            worker.start()