def main(config): if config.gpu_growth: for gpu in tf.config.experimental.list_physical_devices("GPU"): tf.config.experimental.set_memory_growth(gpu, True) assert config.precision in (16, 32), config.precision if config.precision == 16: prec.set_policy(prec.Policy("mixed_float16")) config.steps = int(config.steps) config.logdir.mkdir(parents=True, exist_ok=True) print("Logdir", config.logdir) # Create environments. datadir = config.logdir / "episodes" writer = tf.summary.create_file_writer( str(config.logdir), max_queue=1000, flush_millis=20000 ) writer.set_as_default() train_envs = [ wrappers.Async( lambda: make_env(config, writer, "train", datadir, store=True), config.parallel, ) for _ in range(config.envs) ] test_envs = [ wrappers.Async( lambda: make_env(config, writer, "test", datadir, store=False), config.parallel, ) for _ in range(config.envs) ] actspace = train_envs[0].action_space # Prefill dataset with random episodes. step = count_steps(datadir, config) prefill = max(0, config.prefill - step) print(f"Prefill dataset with {prefill} steps.") random_agent = lambda o, d, _: ([actspace.sample() for _ in d], None) tools.simulate(random_agent, train_envs, prefill / config.action_repeat) writer.flush() # Train and regularly evaluate the agent. step = count_steps(datadir, config) print(f"Simulating agent for {config.steps-step} steps.") agent = Dreamer(config, datadir, actspace, writer) if (config.logdir / "variables.pkl").exists(): print("Load checkpoint.") agent.load(config.logdir / "variables.pkl") state = None while step < config.steps: print("Start evaluation.") tools.simulate(functools.partial(agent, training=False), test_envs, episodes=1) writer.flush() print("Start collection.") steps = config.eval_every // config.action_repeat state = tools.simulate(agent, train_envs, steps, state=state) step = count_steps(datadir, config) agent.save(config.logdir / "variables.pkl") for env in train_envs + test_envs: env.close()
def main(config): print(config) os.environ["CUDA_VISIBLE_DEVICES"] = str(config.gpu_id) if config.gpu_growth: # for gpu in tf.config.experimental.list_physical_devices('GPU'): # tf.config.experimental.set_memory_growth(gpu, True) print(tf.config.experimental.list_physical_devices('GPU')) tf.config.experimental.set_memory_growth( tf.config.experimental.list_physical_devices('GPU')[0], True) assert config.precision in (16, 32), config.precision if config.precision == 16: prec.set_policy(prec.Policy('mixed_float16')) config.steps = int(config.steps) config.logdir.mkdir(parents=True, exist_ok=True) print('Logdir', config.logdir) # Create environments. datadir = config.logdir / 'episodes' writer = tf.summary.create_file_writer(str(config.logdir), max_queue=1000, flush_millis=20000) writer.set_as_default() train_envs = [ wrappers.Async( lambda: make_env(config, writer, 'train', datadir, store=True), config.parallel) for _ in range(config.envs) ] test_envs = [ wrappers.Async( lambda: make_env(config, writer, 'test', datadir, store=False), config.parallel) for _ in range(config.envs) ] actspace = train_envs[0].action_space # Prefill dataset with random episodes. step = count_steps(datadir, config) prefill = max(0, config.prefill - step) print(f'Prefill dataset with {prefill} steps.') random_agent = lambda o, d, _: ([actspace.sample() for _ in d], None) tools.simulate(random_agent, train_envs, prefill / config.action_repeat) writer.flush() # Train and regularly evaluate the agent. step = count_steps(datadir, config) print(f'Simulating agent for {config.steps-step} steps.') agent = Dreamer(config, datadir, actspace, writer) if (config.logdir / 'variables.pkl').exists(): print('Load checkpoint.') agent.load(config.logdir / 'variables.pkl') state = None last_time = time.time() while step < config.steps: print("current_time is:", time.time() - last_time) last_time = time.time() print('Start evaluation.') tools.simulate(functools.partial(agent, training=False), test_envs, episodes=1) writer.flush() print('Start collection.') steps = config.eval_every // config.action_repeat state = tools.simulate(agent, train_envs, steps, state=state) step = count_steps(datadir, config) agent.save(config.logdir / 'variables.pkl') for env in train_envs + test_envs: env.close()
def main(config): if config.gpu_growth: for gpu in tf.config.experimental.list_physical_devices('GPU'): tf.config.experimental.set_memory_growth(gpu, True) assert config.precision in (16, 32), config.precision if config.precision == 16: prec.set_policy(prec.Policy('mixed_float16')) config.steps = int(config.steps) config.logdir.mkdir(parents=True, exist_ok=True) print('Logdir', config.logdir) # Create environments. datadir = config.logdir / 'episodes' writer = tf.summary.create_file_writer(str(config.logdir), max_queue=1000, flush_millis=20000) writer.set_as_default() train_sim_envs = [ wrappers.Async( lambda: make_env(config, writer, 'sim_train', datadir, store=True, real_world=False), config.parallel) for i in range(config.envs) ] if config.real_world_prob > 0: train_real_envs = [ wrappers.Async( lambda: make_env(config, writer, 'real_train', datadir, store=True, real_world=True), config.parallel) for _ in range(config.envs) ] else: train_real_envs = None test_envs = [ wrappers.Async( lambda: make_env( config, writer, 'test', datadir, store=False, real_world=True), config.parallel) for _ in range(config.envs) ] actspace = train_sim_envs[0].action_space # Prefill dataset with random episodes. step = count_steps(datadir, config) prefill = max(0, config.prefill - step) print(f'Prefill dataset with {prefill} steps.') random_agent = lambda o, d, _: ([actspace.sample() for _ in d], None) tools.simulate(random_agent, train_sim_envs, prefill / config.action_repeat) writer.flush() train_real_step_target = config.sample_real_every * config.time_limit # Train and regularly evaluate the agent. step = count_steps(datadir, config) print(f'Simulating agent for {config.steps-step} steps.') agent = Dreamer(config, datadir, actspace, writer) if (config.logdir / 'variables.pkl').exists(): print('Load checkpoint.') agent.load(config.logdir / 'variables.pkl') else: print("checkpoint not loaded") print(config.logdir / 'variables.pkl') print((config.logdir / 'variables.pkl').exists()) state = None while step < config.steps: print('Start evaluation.') tools.simulate(functools.partial(agent, training=False), test_envs, episodes=1) writer.flush() steps = config.eval_every // config.action_repeat print('Start collection from simulator.') state = tools.simulate(agent, train_sim_envs, steps, state=state) if step >= train_real_step_target and train_real_envs is not None: print("Start collection from the real world") state = tools.simulate(agent, train_real_envs, episodes=1, state=state) train_real_step_target += config.sample_real_every * config.time_limit old_step = step step = count_steps(datadir, config) agent.save(config.logdir / 'variables.pkl') for env in train_sim_envs + test_envs: env.close() if train_real_envs is not None: for env in train_real_envs: env.close()
def main(config): print(config) #Set random seeds os.environ['PYTHONHASHSEED'] = str(config.seed) os.environ['TF_CUDNN_DETERMINISTIC'] = '1' random.seed(config.seed) np.random.seed(config.seed) tf.random.set_seed(config.seed) if config.gpu_growth: for gpu in tf.config.experimental.list_physical_devices('GPU'): tf.config.experimental.set_memory_growth(gpu, True) config.logdir = config.logdir / config.task config.logdir = config.logdir / 'seed_{}'.format(config.seed) config.logdir.mkdir(parents=True, exist_ok=True) datadir = config.datadir tf_dir = config.logdir / 'tensorboard' writer = tf.summary.create_file_writer(str(tf_dir), max_queue=1000, flush_millis=20000) writer.set_as_default() # Create environments. train_envs = [ wrappers.Async( lambda: make_env(config, writer, 'train', '.', store=False), config.parallel) for _ in range(config.envs) ] test_envs = [ wrappers.Async( lambda: make_env(config, writer, 'test', '.', store=False), config.parallel) for _ in range(config.envs) ] actspace = train_envs[0].action_space # Train and regularly evaluate the agent. agent = Lompo(config, datadir, actspace, writer) if agent._c.load_model: agent.load_model(config.logdir / 'final_model') print('Load pretarined model') if agent._c.load_buffer: agent.latent_buffer.load(agent._c.logdir / 'buffer.h5py') else: agent._process_data_to_latent() agent.latent_buffer.save(agent._c.logdir / 'buffer.h5py') else: agent.fit_model(agent._c.model_train_steps) #agent.save_model(config.logdir) #agent._generate_real_data(steps = 5000) agent._process_data_to_latent() agent.latent_buffer.save(agent._c.logdir / 'buffer.h5py') if agent._c.load_agent: agent.load_agent(config.logdir) print('Load pretarined actor') while agent.latent_buffer._latent_stored_steps < agent._c.start_training: agent._generate_latent_data(next(agent._dataset)) while agent._agent_step < int(config.agent_train_steps): print('Start evaluation.') tools.simulate(functools.partial(agent, training=False), test_envs, episodes=1) #agent._latent_evaluate(train_envs[0]) writer.flush() print('Start collection.') agent.train_agent(agent._c.agent_itters_per_step) #agent._generate_real_data(steps = 5) if config.sample: agent._add_data(num_episodes=1) else: agent._process_data_to_latent(num_episodes=1) for env in train_envs + test_envs: env.close()
def main(config): if config.gpu_growth: for gpu in tf.config.experimental.list_physical_devices('GPU'): tf.config.experimental.set_memory_growth(gpu, True) assert config.precision in (16, 32), config.precision if config.precision == 16: prec.set_policy(prec.Policy('mixed_float16')) config.steps = int(config.steps) config.logdir.mkdir(parents=True, exist_ok=True) print('Logdir', config.logdir) arg_dict = vars(config).copy() del arg_dict['logdir'] with open(os.path.join(config.logdir, 'args.json'), 'w') as fout: import json json.dump(arg_dict, fout) # Create environments. datadir = config.logdir / 'episodes' writer = tf.summary.create_file_writer(str(config.logdir), max_queue=1000, flush_millis=20000) writer.set_as_default() train_envs = [ wrappers.Async( lambda: make_env(config, writer, 'train', datadir, train=True), config.parallel) for _ in range(config.envs) ] test_envs = [ wrappers.Async( lambda: make_env(config, writer, 'test', datadir, train=False), config.parallel) for _ in range(config.envs) ] actspace = train_envs[0].action_space # Prefill dataset with random episodes. step = count_steps(datadir, config) prefill = max(0, config.prefill - step) print(f'Prefill dataset with {prefill} steps.') def random_agent(o, d, _): return ([actspace.sample() for _ in d], None) tools.simulate(random_agent, train_envs, prefill / config.action_repeat) writer.flush() # Train and regularly evaluate the agent. step = count_steps(datadir, config) print(f'Simulating agent for {config.steps-step} steps.') agent = CVRL(config, datadir, actspace, writer) if (config.logdir / 'variables.pkl').exists(): print('Load checkpoint.') agent.load(config.logdir / 'variables.pkl') state = None while step < config.steps: print('Start evaluation.') tools.simulate(functools.partial(agent, training=False), test_envs, episodes=1) writer.flush() print('Start collection.') steps = config.eval_every // config.action_repeat state = tools.simulate(agent, train_envs, steps, state=state) step = count_steps(datadir, config) agent.save(config.logdir / 'variables.pkl') for env in train_envs + test_envs: env.close()