Beispiel #1
0
 def call(*args, **kwargs):
     global _writer
     if _writer is None:
         logdir = logger.get_dir()
         if logdir is None:
             logdir = logger.auto_set_dir(action='d')
             logger.warning(
                 "[VisualDL] logdir is None, will save VisualDL files to {}\nView the data using: visualdl --logdir=./{} --host={}"
                 .format(logdir, logdir, get_ip_address()))
         _writer = LogWriter(logdir=logger.get_dir())
     func = getattr(_writer, func_name)
     func(*args, **kwargs)
     _writer.flush()
Beispiel #2
0
 def call(*args, **kwargs):
     global _writer
     if _writer is None:
         logdir = logger.get_dir()
         if logdir is None:
             logdir = logger.auto_set_dir(action='d')
             logger.warning(
                 "[tensorboard] logdir is None, will save tensorboard files to {}\nView the data using: tensorboard --logdir=./{} --host={}"
                 .format(logdir, logdir, get_ip_address()))
         _writer = SummaryWriter(logdir=logger.get_dir())
     func = getattr(_writer, func_name)
     func(*args, **kwargs)
     _writer.flush()
    def __init__(self, config):
        self.config = config
        self.sample_data_queue = queue.Queue(
            maxsize=config['sample_queue_max_size'])

        #=========== Create Agent ==========
        env = IntraBuildingEnv("config.ini")
        self._mansion_attr = env._mansion.attribute
        self._obs_dim = obs_dim(self._mansion_attr)
        self._act_dim = act_dim(self._mansion_attr)

        self.config['obs_shape'] = self._obs_dim
        self.config['act_dim'] = self._act_dim

        model = RLDispatcherModel(self._act_dim)
        algorithm = IMPALA(model, hyperparas=config)
        self.agent = ElevatorAgent(algorithm, config, self.learn_data_provider)

        self.cache_params = self.agent.get_params()
        self.params_lock = threading.Lock()
        self.params_updated = False
        self.cache_params_sent_cnt = 0
        self.total_params_sync = 0

        #========== Learner ==========
        self.lr, self.entropy_coeff = None, None
        self.lr_scheduler = PiecewiseScheduler(config['lr_scheduler'])
        self.entropy_coeff_scheduler = PiecewiseScheduler(
            config['entropy_coeff_scheduler'])

        self.total_loss_stat = WindowStat(100)
        self.pi_loss_stat = WindowStat(100)
        self.vf_loss_stat = WindowStat(100)
        self.entropy_stat = WindowStat(100)
        self.kl_stat = WindowStat(100)
        self.learn_time_stat = TimeStat(100)
        self.start_time = None

        self.learn_thread = threading.Thread(target=self.run_learn)
        self.learn_thread.setDaemon(True)
        self.learn_thread.start()

        #========== Remote Actor ===========
        self.remote_count = 0

        self.batch_buffer = []
        self.remote_metrics_queue = queue.Queue()
        self.sample_total_steps = 0

        self.remote_manager_thread = threading.Thread(
            target=self.run_remote_manager)
        self.remote_manager_thread.setDaemon(True)
        self.remote_manager_thread.start()

        self.csv_logger = CSVLogger(
            os.path.join(logger.get_dir(), 'result.csv'))

        from utils import Summary
        self.summary = Summary('./output')
Beispiel #4
0
    def __init__(self):
        self.rpm = ReplayMemory(int(2e6), OBS_DIM, ACT_DIM)

        # Need acquire lock when model learning or predicting
        self.locks = []
        for i in range(args.ensemble_num):
            self.locks.append(threading.Lock())

        models = []
        for i in range(args.ensemble_num):
            models.append(OpenSimModel(OBS_DIM, VEL_DIM, ACT_DIM, model_id=i))

        hyperparas = {
            'gamma': GAMMA,
            'tau': TAU,
            'ensemble_num': args.ensemble_num
        }
        alg = MultiHeadDDPG(models, hyperparas)

        self.agent = OpenSimAgent(alg, OBS_DIM, ACT_DIM, args.ensemble_num)

        self.scalars_manager = ScalarsManager(logger.get_dir())

        # add lock when appending data to rpm or writing scalars to tensorboard
        self.MEMORY_LOCK = threading.Lock()

        self.clients = defaultdict(self.ClientState)

        self.ready_client_queue = queue.Queue()

        self.noiselevel = 0.5
        self.global_step = 0

        # thread to keep training
        t = threading.Thread(target=self.keep_training)
        t.start()
Beispiel #5
0
 def save(self, T):
     save_path = os.path.join(
         logger.get_dir(), 'model_every_100_episodes/episodes-{}'.format(T))
     self.agent.save(save_path)
Beispiel #6
0
 def save_rpm(self):
     save_path = os.path.join(logger.get_dir(), "rpm.npz")
     self.rpm.save(save_path)
Beispiel #7
0
    parser = argparse.ArgumentParser()
    parser.add_argument('--env',
                        help='Mujoco environment name',
                        default='HalfCheetah-v1')
    parser.add_argument('--train_total_steps',
                        type=int,
                        default=int(55e5),
                        help='maximum training steps')
    parser.add_argument(
        '--test_every_steps',
        type=int,
        default=int(1e4),
        help='the step interval between two consecutive evaluations')
    parser.add_argument('--kappa', type=float, default=float(5), help='kappa')
    parser.add_argument('--epoch',
                        type=float,
                        default=float(10000),
                        help='epoch')
    parser.add_argument('--alpha', type=float, default=float(2), help='alpha')
    parser.add_argument('--seed', type=int, default=int(1), help='env seed')

    args = parser.parse_args()

    logger.set_dir('./train_log/{}_k_{}_e_{}_a_{}_s_{}_{}'.format(
        args.env, str(args.kappa), str(args.epoch), str(args.alpha),
        str(args.seed), time.strftime("%H%M%S")))
    csv_logger = CSVLogger(
        os.path.join(logger.get_dir(),
                     'ADER_{}_{}.csv'.format(args.env, str(args.seed))))
    main()
Beispiel #8
0
def restore(agent):
    learnDir = os.path.join(logger.get_dir(),'learn_01')
    predictDir = os.path.join(logger.get_dir(),'predict_01')   
    logger.info('restore model from {}'.format(learnDir))
    agent.load_params(learnDir,predictDir)
Beispiel #9
0
def save(agent):
    learnDir = os.path.join(logger.get_dir(),'learn_01')
    predictDir = os.path.join(logger.get_dir(),'predict_01')
    agent.save_params(learnDir,predictDir)
Beispiel #10
0
def restore(agent):
    print(logger.get_dir())
    learnDir = r"D:\GoogleDownloads\zheng_bo_pu-PARL-Sample-master\PARL-Sample\flappy_bird\log_dir\Train_Test_Working_Flow\learn"
    predictDir = r"D:\GoogleDownloads\zheng_bo_pu-PARL-Sample-master\PARL-Sample\flappy_bird\log_dir\Train_Test_Working_Flow\predict"
    print('restore model from {}'.format(learnDir))
    agent.load_params(learnDir,predictDir)