def test(model, test_loader, cuda): model.eval() test_loss = 0 correct = 0 for data, target in test_loader: if cuda: data, target = data.cuda(), target.cuda() data, target = Variable(data, volatile=True), Variable(target) output = model(data) test_loss += F.nll_loss( output, target, size_average=False).data[0] # sum up batch loss pred = output.data.max( 1, keepdim=True)[1] # get the index of the max log-probability correct += pred.eq(target.data.view_as(pred)).cpu().sum() test_loss /= len(test_loader.dataset) accuracy = correct / len(test_loader.dataset) logging.info( 'Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( test_loss, correct, len(test_loader.dataset), 100. * accuracy)) output_path = get_outputs_path() model_path = os.path.join(output_path, "model.dat") torch.save(model.state_dict(), model_path) send_metrics(loss=test_loss.item(), accuracy=accuracy.item())
def main(args): kwargs = {} if args.render_env: kwargs['render'] = args.render_env print('rendering...') if args.log_dir == 'polyaxon': from polyaxon_helper import get_outputs_path args.log_dir = get_outputs_path() print(f'Writing to logdir: {args.log_dir}') writer = SummaryWriter(log_dir=args.log_dir) env = gym.make(args.env, **kwargs) env.seed(args.random_seed) agent_class = getattr(importlib.import_module(args.agent_module), args.agent) agent = agent_class(env.observation_space, env.action_space, argv, writer) try: train(args, agent, writer, env) except KeyboardInterrupt: env.close() agent.save('Interrupt') raise KeyboardInterrupt agent.save('Final') env.close()
def main(argv=sys.argv[1:]): argv.extend(['-f', get_outputs_path()]) cartpole_client.main(argv) send_metrics(score=cartpole_client.RESULTS[0]['score'])
def prepare_experiment_run(spec_config, experiment_idx, task_type=TaskType.MASTER, task_id=0): spec = Specification.read(spec_config) cluster, _ = spec.cluster_def if (task_type not in cluster or not isinstance(cluster[task_type], int) or task_id >= cluster[task_type]): raise ValueError('task_type, task_id `{}, {}` is not supported by ' 'the specification file passed.'.format( task_type, task_id)) env = spec.environment if spec.is_local: output_dir = spec.project_path log_level = LOGGING_LEVEL[spec.settings.logging.level] else: output_dir = get_outputs_path() log_level = get_log_level() if not env: tf.logging.set_verbosity(tf.logging.INFO) configs = {TaskType.MASTER: [RunConfig()]} delay_workers_by_global_step = False else: tf.logging.set_verbosity(log_level) configs, _ = _get_run_configs(spec, experiment_idx) delay_workers_by_global_step = env.delay_workers_by_global_step train_input_fn, train_steps, train_hooks = _get_train(spec.train) (eval_input_fn, eval_steps, eval_hooks, eval_delay_secs, continuous_eval_throttle_secs) = _get_eval(spec.eval) estimator = getters.get_estimator(spec.model, configs[task_type][task_id], output_dir=output_dir) return Experiment( estimator=estimator, train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, train_steps=train_steps, eval_steps=eval_steps, train_hooks=train_hooks, eval_hooks=eval_hooks, eval_delay_secs=eval_delay_secs, continuous_eval_throttle_secs=continuous_eval_throttle_secs, delay_workers_by_global_step=delay_workers_by_global_step, export_strategies=spec.settings.export_strategies)
def get_callbacks(model_type): # Prepare callbacks for model saving # Prepare model model saving directory model_name = 'cifar10_%s_model.{epoch:03d}.h5' % model_type filepath = os.path.join(get_outputs_path(), model_name) checkpoint = ModelCheckpoint(filepath=filepath, monitor='val_acc', verbose=1, save_best_only=True) # Learning rate adjustment lr_scheduler = LearningRateScheduler(lr_schedule) lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), cooldown=0, patience=5, min_lr=0.5e-6) return [checkpoint, lr_reducer, lr_scheduler]
def __init__(self, observation_space, action_space, args_for_parse, summary_writer=None): self.action_space = action_space self.observation_space = observation_space self.action_high = action_space.high self.action_low = action_space.low if any(action_space.high != -action_space.low): raise ValueError( f"Env action space is not symmetric. high :{action_space.high} low: {action_space.low}" ) self.stats = self.get_state_distr_stats(observation_space) parser = ArgumentParser(description='PPO') parser = self.add_arguments(parser) parser.add_argument('--device', help='Enable gpu optimization', type=str, default='cuda') parser.add_argument('--sampler', help='policy sampler', default='OrnsteinUhlenbeckSampler', type=str) self.args, _ = parser.parse_known_args(args_for_parse) if self.args.model_dir == 'polyaxon': from polyaxon_helper import get_outputs_path self.args.model_dir = get_outputs_path() self.policy_sampler = getattr( importlib.import_module('policy_samplers.{}'.format( self.args.sampler)), self.args.sampler)(args_for_parse) self.action_scale = torch.FloatTensor(action_space.high.reshape( 1, -1)).to(self.args.device) print(f'Parsed Agent parameters {self.args}')
def train(args, agent, writer, env): # random loop logging.info('Running random episodes {} times'.format( args.random_episodes)) for i in range(args.random_episodes): ob = env.reset() for _ in range(args.max_episode_len): ob, reward, done = step_random(env, agent, ob, episode_num=i) if done: break if args.use_monitor: if args.monitor_dir == 'polyaxon': from polyaxon_helper import get_outputs_path args.monitor_dir = get_outputs_path() print(f'Using monitor_dir: {args.monitor_dir}') print( f"Using Gym monitor to save videos : {args.use_gym_monitor} 123 {args.render_env}" ) env = wrappers.Monitor(env, directory=args.monitor_dir, force=True) # policy loop global_step = 0 for i in range(args.random_episodes, args.max_episodes): ob = env.reset() reward_per_ep = 0 for ep_step in range(args.max_episode_len): global_step += 1 ob, reward, done = step_policy(env, agent, ob, i) reward_per_ep += reward if done: break writer.add_scalar("reward", reward_per_ep, global_step=i) writer.add_scalar("avg_legth", ep_step, global_step=i) if i % args.checkpoint_episodes == 0: agent.save('checkpoint_{}'.format(i)) writer.close() env.close()
def main(): """Run PPO until the environment throws an exception.""" config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True # pylint: disable=E1101 tf.Session(config=config).__enter__() b_logger.configure(get_outputs_path()) env = DummyVecEnv([make_env]) ppo2.learn(policy=policies.CnnPolicy, env=env, nsteps=4096, nminibatches=8, lam=0.95, gamma=0.99, noptepochs=3, log_interval=1, ent_coef=0.01, lr=lambda _: 2e-4, cliprange=lambda _: 0.1, total_timesteps=int(1e7), save_interval=1)
def get_weight_filename(): return '{}/{}'.format(get_outputs_path(), 'checkpoint.pth.tar')
'--batch-norm-decay', type=float, default=0.997, help='Decay for batch norm.') parser.add_argument( '--batch-norm-epsilon', type=float, default=1e-5, help='Epsilon for batch norm.') args = parser.parse_args() if args.num_gpus < 0: raise ValueError( 'Invalid GPU count: \"--num-gpus\" must be 0 or a positive integer.') if args.num_gpus == 0 and args.variable_strategy == 'GPU': raise ValueError('num-gpus=0, CPU must be used as parameter server. Set' '--variable-strategy=CPU.') if (args.num_layers - 2) % 6 != 0: raise ValueError('Invalid --num-layers parameter.') if args.num_gpus != 0 and args.train_batch_size % args.num_gpus != 0: raise ValueError('--train-batch-size must be multiple of --num-gpus.') if args.num_gpus != 0 and args.eval_batch_size % args.num_gpus != 0: raise ValueError('--eval-batch-size must be multiple of --num-gpus.') data_dir = os.path.join(list(get_data_paths().values())[0], 'cifar-10-data') # We create data for the project if it does not exists if not os.path.exists(os.path.join(data_dir, 'train.tfrecords')): generate_data(data_dir) train(job_dir=get_outputs_path(), data_dir=data_dir, **vars(args))