def test(model, test_loader, cuda):
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        if cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data, volatile=True), Variable(target)
        output = model(data)
        test_loss += F.nll_loss(
            output, target, size_average=False).data[0]  # sum up batch loss
        pred = output.data.max(
            1, keepdim=True)[1]  # get the index of the max log-probability
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()

    test_loss /= len(test_loader.dataset)
    accuracy = correct / len(test_loader.dataset)
    logging.info(
        'Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
            test_loss, correct, len(test_loader.dataset), 100. * accuracy))
    output_path = get_outputs_path()
    model_path = os.path.join(output_path, "model.dat")
    torch.save(model.state_dict(), model_path)

    send_metrics(loss=test_loss.item(), accuracy=accuracy.item())
Example #2
0
def main(args):
    kwargs = {}
    if args.render_env:
        kwargs['render'] = args.render_env
        print('rendering...')

    if args.log_dir == 'polyaxon':
        from polyaxon_helper import get_outputs_path
        args.log_dir = get_outputs_path()
        print(f'Writing to logdir: {args.log_dir}')
    writer = SummaryWriter(log_dir=args.log_dir)

    env = gym.make(args.env, **kwargs)
    env.seed(args.random_seed)

    agent_class = getattr(importlib.import_module(args.agent_module),
                          args.agent)
    agent = agent_class(env.observation_space, env.action_space, argv, writer)

    try:
        train(args, agent, writer, env)
    except KeyboardInterrupt:
        env.close()
        agent.save('Interrupt')
        raise KeyboardInterrupt
    agent.save('Final')
    env.close()
Example #3
0
def main(argv=sys.argv[1:]):

    argv.extend(['-f', get_outputs_path()])

    cartpole_client.main(argv)

    send_metrics(score=cartpole_client.RESULTS[0]['score'])
Example #4
0
def prepare_experiment_run(spec_config,
                           experiment_idx,
                           task_type=TaskType.MASTER,
                           task_id=0):
    spec = Specification.read(spec_config)
    cluster, _ = spec.cluster_def

    if (task_type not in cluster or not isinstance(cluster[task_type], int)
            or task_id >= cluster[task_type]):
        raise ValueError('task_type, task_id `{}, {}` is not supported by '
                         'the specification file passed.'.format(
                             task_type, task_id))

    env = spec.environment

    if spec.is_local:
        output_dir = spec.project_path
        log_level = LOGGING_LEVEL[spec.settings.logging.level]
    else:
        output_dir = get_outputs_path()
        log_level = get_log_level()

    if not env:
        tf.logging.set_verbosity(tf.logging.INFO)
        configs = {TaskType.MASTER: [RunConfig()]}
        delay_workers_by_global_step = False
    else:
        tf.logging.set_verbosity(log_level)
        configs, _ = _get_run_configs(spec, experiment_idx)
        delay_workers_by_global_step = env.delay_workers_by_global_step

    train_input_fn, train_steps, train_hooks = _get_train(spec.train)
    (eval_input_fn, eval_steps, eval_hooks, eval_delay_secs,
     continuous_eval_throttle_secs) = _get_eval(spec.eval)

    estimator = getters.get_estimator(spec.model,
                                      configs[task_type][task_id],
                                      output_dir=output_dir)

    return Experiment(
        estimator=estimator,
        train_input_fn=train_input_fn,
        eval_input_fn=eval_input_fn,
        train_steps=train_steps,
        eval_steps=eval_steps,
        train_hooks=train_hooks,
        eval_hooks=eval_hooks,
        eval_delay_secs=eval_delay_secs,
        continuous_eval_throttle_secs=continuous_eval_throttle_secs,
        delay_workers_by_global_step=delay_workers_by_global_step,
        export_strategies=spec.settings.export_strategies)
Example #5
0
def get_callbacks(model_type):
    # Prepare callbacks for model saving
    # Prepare model model saving directory
    model_name = 'cifar10_%s_model.{epoch:03d}.h5' % model_type
    filepath = os.path.join(get_outputs_path(), model_name)
    checkpoint = ModelCheckpoint(filepath=filepath,
                                 monitor='val_acc',
                                 verbose=1,
                                 save_best_only=True)

    # Learning rate adjustment
    lr_scheduler = LearningRateScheduler(lr_schedule)

    lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1),
                                   cooldown=0,
                                   patience=5,
                                   min_lr=0.5e-6)

    return [checkpoint, lr_reducer, lr_scheduler]
Example #6
0
    def __init__(self,
                 observation_space,
                 action_space,
                 args_for_parse,
                 summary_writer=None):

        self.action_space = action_space
        self.observation_space = observation_space

        self.action_high = action_space.high
        self.action_low = action_space.low

        if any(action_space.high != -action_space.low):
            raise ValueError(
                f"Env action space is not symmetric. high :{action_space.high} low: {action_space.low}"
            )

        self.stats = self.get_state_distr_stats(observation_space)
        parser = ArgumentParser(description='PPO')
        parser = self.add_arguments(parser)
        parser.add_argument('--device',
                            help='Enable gpu optimization',
                            type=str,
                            default='cuda')
        parser.add_argument('--sampler',
                            help='policy sampler',
                            default='OrnsteinUhlenbeckSampler',
                            type=str)
        self.args, _ = parser.parse_known_args(args_for_parse)

        if self.args.model_dir == 'polyaxon':
            from polyaxon_helper import get_outputs_path
            self.args.model_dir = get_outputs_path()

        self.policy_sampler = getattr(
            importlib.import_module('policy_samplers.{}'.format(
                self.args.sampler)), self.args.sampler)(args_for_parse)

        self.action_scale = torch.FloatTensor(action_space.high.reshape(
            1, -1)).to(self.args.device)

        print(f'Parsed Agent parameters {self.args}')
Example #7
0
def train(args, agent, writer, env):
    # random loop
    logging.info('Running random episodes {} times'.format(
        args.random_episodes))
    for i in range(args.random_episodes):
        ob = env.reset()
        for _ in range(args.max_episode_len):
            ob, reward, done = step_random(env, agent, ob, episode_num=i)
            if done:
                break

    if args.use_monitor:
        if args.monitor_dir == 'polyaxon':
            from polyaxon_helper import get_outputs_path
            args.monitor_dir = get_outputs_path()
            print(f'Using monitor_dir: {args.monitor_dir}')
        print(
            f"Using Gym monitor to save videos : {args.use_gym_monitor} 123 {args.render_env}"
        )
        env = wrappers.Monitor(env, directory=args.monitor_dir, force=True)

    # policy loop
    global_step = 0
    for i in range(args.random_episodes, args.max_episodes):
        ob = env.reset()
        reward_per_ep = 0
        for ep_step in range(args.max_episode_len):
            global_step += 1
            ob, reward, done = step_policy(env, agent, ob, i)
            reward_per_ep += reward
            if done:
                break
        writer.add_scalar("reward", reward_per_ep, global_step=i)
        writer.add_scalar("avg_legth", ep_step, global_step=i)

        if i % args.checkpoint_episodes == 0:
            agent.save('checkpoint_{}'.format(i))

    writer.close()
    env.close()
Example #8
0
def main():
    """Run PPO until the environment throws an exception."""
    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True  # pylint: disable=E1101
    tf.Session(config=config).__enter__()

    b_logger.configure(get_outputs_path())

    env = DummyVecEnv([make_env])
    ppo2.learn(policy=policies.CnnPolicy,
               env=env,
               nsteps=4096,
               nminibatches=8,
               lam=0.95,
               gamma=0.99,
               noptepochs=3,
               log_interval=1,
               ent_coef=0.01,
               lr=lambda _: 2e-4,
               cliprange=lambda _: 0.1,
               total_timesteps=int(1e7),
               save_interval=1)
Example #9
0
def get_weight_filename():
    return '{}/{}'.format(get_outputs_path(), 'checkpoint.pth.tar')
Example #10
0
        '--batch-norm-decay',
        type=float,
        default=0.997,
        help='Decay for batch norm.')
    parser.add_argument(
        '--batch-norm-epsilon',
        type=float,
        default=1e-5,
        help='Epsilon for batch norm.')
    args = parser.parse_args()

    if args.num_gpus < 0:
        raise ValueError(
            'Invalid GPU count: \"--num-gpus\" must be 0 or a positive integer.')
    if args.num_gpus == 0 and args.variable_strategy == 'GPU':
        raise ValueError('num-gpus=0, CPU must be used as parameter server. Set'
                         '--variable-strategy=CPU.')
    if (args.num_layers - 2) % 6 != 0:
        raise ValueError('Invalid --num-layers parameter.')
    if args.num_gpus != 0 and args.train_batch_size % args.num_gpus != 0:
        raise ValueError('--train-batch-size must be multiple of --num-gpus.')
    if args.num_gpus != 0 and args.eval_batch_size % args.num_gpus != 0:
        raise ValueError('--eval-batch-size must be multiple of --num-gpus.')

    data_dir = os.path.join(list(get_data_paths().values())[0], 'cifar-10-data')
    # We create data for the project if it does not exists
    if not os.path.exists(os.path.join(data_dir, 'train.tfrecords')):
        generate_data(data_dir)

    train(job_dir=get_outputs_path(), data_dir=data_dir, **vars(args))