Beispiel #1
0
    group.add_argument('--data-type',
                       type=str,
                       default='float32',
                       choices=['float32'],
                       help='Choose the data type.')
    group.add_argument('--large-embedding',
                       default=False,
                       action='store_true',
                       help="set small or large embedding size")
    group.add_argument('--use-ipu-model',
                       default=False,
                       action='store_true',
                       help="use IPU model or not.")
    return parser


if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description="CTR Model Training in Tensorflow")
    parser = add_model_arguments(parser)
    parser = add_dataset_arguments(parser)
    parser = add_training_arguments(parser)
    parser = logger.add_arguments(parser)
    args, unknown = parser.parse_known_args()
    args = vars(args)

    logger.print_setting(args)
    setup_logger(logging.INFO, tf_log)

    inference(args)
Beispiel #2
0
        help="Replicate graph over N workers to increase batch to batch-size*N"
    )
    group.add_argument('--model-path',
                       type=str,
                       default='./dnn_save_path/ckpt_noshuffDIEN3',
                       help='Place to store and restore model')
    group.add_argument('--use-ipu-model',
                       default=False,
                       action='store_true',
                       help="use IPU model or not.")
    group.add_argument('--use-ipu-emb',
                       default=False,
                       action='store_true',
                       help="Use host embeddig or put embedding on ipu.")
    return parser


if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description="CTR Model Training in Tensorflow", add_help=False)
    parser = add_model_arguments(parser)
    parser = add_dataset_arguments(parser)
    parser = add_training_arguments(parser)
    parser = logger.add_arguments(parser)
    args, _ = parser.parse_known_args()
    args = vars(args)
    logger.print_setting(args, is_dien=False, is_training=False)
    setup_logger(logging.DEBUG, tf_log, name='dien_log.txt')

    inference(args)
Beispiel #3
0
def evaluate(args):
    start_time = time.time()
    torch.set_default_tensor_type('torch.FloatTensor')

    pthfile = torch.load(args['load_file'],
                         map_location=lambda storage, loc: storage.cpu())

    # Create the output directory
    output_dir = os.path.join(
        os.path.dirname(args['load_file']), args['output_directory'],
        os.path.split(args['env'])[1] + 'evaluation-' +
        datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S.%f"))
    try:
        os.makedirs(output_dir)
    except OSError:
        if not os.path.isdir(output_dir):
            raise
    print('saving to: ' + output_dir + '/')

    start_log_setup = time.time()
    log = {}
    setup_logger('test.log', r'{0}/test.log'.format(output_dir))
    log['test.log'] = logging.getLogger('test.log')
    end_log_setup = time.time()
    print('single evaluate log setup: %d' % (end_log_setup - start_log_setup))

    gpu_id = args['gpu_ids'][-1]

    torch.manual_seed(args['seed'])
    npr.seed(args['seed'] + 1)
    if gpu_id >= 0:
        torch.cuda.manual_seed(args['seed'])

    for k in args.keys():
        log['test.log'].info('{0}: {1}'.format(k, args[k]))

    env = create_env(args['env'], args)
    player = Agent(None, env, args, None)

    # Wrap the environment so that it saves a video
    if args['render_video']:
        player.env = gym.wrappers.Monitor(player.env, output_dir, force=True)

    start_model = time.time()
    AC = importlib.import_module(args['model_name'])
    player.model = AC.ActorCritic(env.observation_space, env.action_space,
                                  args['stack_frames'], args)

    player.gpu_id = gpu_id
    if gpu_id >= 0:
        with torch.cuda.device(gpu_id):
            player.model = player.model.cuda()

    if args['load_best']:
        player.model.load_state_dict(pthfile['best_state_dict'])
    else:
        player.model.load_state_dict(pthfile['state_dict'])
    player.model.eval()

    end_model = time.time()
    print('single evaluate model setup time: %d' % (end_model - start_model))

    # Keep track of returns
    all_episode_returns = []
    for i_episode in range(args['num_episodes']):
        player.state, player.info = player.env.reset()
        player.state = torch.from_numpy(player.state).float()
        if gpu_id >= 0:
            with torch.cuda.device(gpu_id):
                player.state = player.state.cuda()
        player.eps_len = 0
        reward_sum = 0
        episode_step = 0
        while True:
            player.action_test()
            reward_sum += player.reward
            episode_step += 1

            if player.done:
                all_episode_returns.append(reward_sum)
                #num_tests += 1
                #reward_total_sum += reward_sum
                #reward_mean = reward_total_sum / num_tests
                log['test.log'].info(
                    "Episode_length, {0}, reward_sum, {1}".format(
                        player.eps_len, reward_sum))
                break
    end_episodes = time.time()
    print('single evaluate time for %d episodes: %d' %
          (args['num_episodes'], end_episodes - end_model))
    print('single evaluate seconds per episode: %d' %
          ((end_episodes - end_model) / args['num_episodes']))
    all_episode_returns = np.array(all_episode_returns)
    all_episode_successes = np.array(all_episode_returns > 300.,
                                     dtype=np.float32)

    evaluation_statistics = {
        'Mean Return': np.mean(all_episode_returns),
        'Std Return': np.std(all_episode_returns),
        'Min Return': np.min(all_episode_returns),
        'Max Return': np.max(all_episode_returns),
        'Mean Success': np.mean(all_episode_successes),
        'Number Successes': np.sum(all_episode_successes),
        'Number Total': args['num_episodes'],
        'Std Success': np.std(all_episode_successes),
        'Min Success': np.min(all_episode_successes),
        'Max Success': np.max(all_episode_successes),
        'all_episode_returns': all_episode_returns,
        'all_episode_successes': all_episode_successes,
    }

    # Save raw data to a file
    torch.save(
        {
            'all_episode_returns': all_episode_returns,
            'all_episode_successes': all_episode_successes,
        }, os.path.join(output_dir, 'evaluation_statistics.pth'))

    print(
        'Average Episodic Return: \n\tmean: {0}\n\tstd: {1}\n\tmin: {2}\n\t  \
            max: {3}'.format(np.mean(all_episode_returns),
                             np.std(all_episode_returns),
                             np.min(all_episode_returns),
                             np.max(all_episode_returns)))
    print(
        'Average Episodic Success: \n\tmean: {0} ({1}/{2})\n\tstd: {3}\n\t   \
            min: {4}\n\tmax: {5}'.format(np.mean(all_episode_successes),
                                         np.sum(all_episode_successes),
                                         args['num_episodes'],
                                         np.std(all_episode_successes),
                                         np.min(all_episode_successes),
                                         np.max(all_episode_successes)))

    # Shut down logging system and close open file handles
    start_log_shutdown = time.time()
    logging.shutdown()

    end_time = time.time()
    print('single evaluate log shutdown: %d' % (end_time - start_log_shutdown))
    print('single evaluate total time for %d episodes: %d' %
          (args['num_episodes'], end_time - start_time))
    print('single evaluate overall seconds per episode: %f' %
          ((end_time - start_time) / args['num_episodes']))
    return evaluation_statistics
def test(args, shared_model):
    ptitle('Test Agent')
    gpu_id = args.gpu_ids[-1]
    log = {}
    setup_logger(
        '{}_log'.format(args.env),
        r'{0}{1}{2}_log'.format(args.log_dir, args.save_prefix, args.env))
    log['{}_log'.format(args.env)] = logging.getLogger('{}_log'.format(
        args.env))
    d_args = vars(args)
    for k in d_args.keys():
        log['{}_log'.format(args.env)].info('{0}: {1}'.format(k, d_args[k]))

    torch.manual_seed(args.seed)
    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed)
    env = create_env(args.env, args)
    reward_sum = 0
    start_time = time.time()
    num_tests = 0
    reward_total_sum = 0
    player = Agent(None, None, env, args, None)
    player.gpu_id = gpu_id

    AC = importlib.import_module(args.model_name)
    player.model = AC.ActorCritic(env.observation_space, env.action_space,
                                  args.stack_frames, args)

    player.state, player.info = player.env.reset()
    player.state = torch.from_numpy(player.state).float()
    if gpu_id >= 0:
        with torch.cuda.device(gpu_id):
            player.model = player.model.cuda()
            player.state = player.state.cuda()
    player.model.eval()

    episode_count = 0
    all_scores = []
    max_score = 0
    while True:
        if player.done:
            episode_count += 1
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.model.load_state_dict(shared_model.state_dict())
            else:
                player.model.load_state_dict(shared_model.state_dict())

        player.action_test()
        reward_sum += player.reward

        if player.done:
            num_tests += 1
            reward_total_sum += reward_sum
            reward_mean = reward_total_sum / num_tests
            log['{}_log'.format(args.env)].info(
                "Time {0}, episode reward {1}, episode length {2}, reward mean {3:.4f}"
                .format(
                    time.strftime("%Hh %Mm %Ss",
                                  time.gmtime(time.time() - start_time)),
                    reward_sum, player.eps_len, reward_mean))

            # Plot scores every 5 episodes
            all_scores.append(reward_sum)
            if (episode_count % 5 == 0):
                plt.clf()
                plt.plot(range(len(all_scores)), all_scores)
                plt.title('Test Episode Returns')
                plt.xlabel('Test Episode')
                plt.ylabel('Return')
                plt.savefig('{0}{1}{2}.png'.format(args.log_dir,
                                                   args.save_prefix, args.env))

            if args.save_max and reward_sum >= max_score:
                max_score = reward_sum
                if gpu_id >= 0:
                    with torch.cuda.device(gpu_id):
                        state_to_save = player.model.state_dict()
                        torch.save(
                            state_to_save,
                            '{0}{1}{2}.dat'.format(args.save_model_dir,
                                                   args.save_prefix, args.env))
                else:
                    state_to_save = player.model.state_dict()
                    torch.save(
                        state_to_save,
                        '{0}{1}{2}.dat'.format(args.save_model_dir,
                                               args.save_prefix, args.env))

            reward_sum = 0
            player.eps_len = 0
            state, player.info = player.env.reset()
            time.sleep(60)
            player.state = torch.from_numpy(state).float()
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.state = player.state.cuda()