group.add_argument('--data-type', type=str, default='float32', choices=['float32'], help='Choose the data type.') group.add_argument('--large-embedding', default=False, action='store_true', help="set small or large embedding size") group.add_argument('--use-ipu-model', default=False, action='store_true', help="use IPU model or not.") return parser if __name__ == '__main__': parser = argparse.ArgumentParser( description="CTR Model Training in Tensorflow") parser = add_model_arguments(parser) parser = add_dataset_arguments(parser) parser = add_training_arguments(parser) parser = logger.add_arguments(parser) args, unknown = parser.parse_known_args() args = vars(args) logger.print_setting(args) setup_logger(logging.INFO, tf_log) inference(args)
help="Replicate graph over N workers to increase batch to batch-size*N" ) group.add_argument('--model-path', type=str, default='./dnn_save_path/ckpt_noshuffDIEN3', help='Place to store and restore model') group.add_argument('--use-ipu-model', default=False, action='store_true', help="use IPU model or not.") group.add_argument('--use-ipu-emb', default=False, action='store_true', help="Use host embeddig or put embedding on ipu.") return parser if __name__ == '__main__': parser = argparse.ArgumentParser( description="CTR Model Training in Tensorflow", add_help=False) parser = add_model_arguments(parser) parser = add_dataset_arguments(parser) parser = add_training_arguments(parser) parser = logger.add_arguments(parser) args, _ = parser.parse_known_args() args = vars(args) logger.print_setting(args, is_dien=False, is_training=False) setup_logger(logging.DEBUG, tf_log, name='dien_log.txt') inference(args)
def evaluate(args): start_time = time.time() torch.set_default_tensor_type('torch.FloatTensor') pthfile = torch.load(args['load_file'], map_location=lambda storage, loc: storage.cpu()) # Create the output directory output_dir = os.path.join( os.path.dirname(args['load_file']), args['output_directory'], os.path.split(args['env'])[1] + 'evaluation-' + datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S.%f")) try: os.makedirs(output_dir) except OSError: if not os.path.isdir(output_dir): raise print('saving to: ' + output_dir + '/') start_log_setup = time.time() log = {} setup_logger('test.log', r'{0}/test.log'.format(output_dir)) log['test.log'] = logging.getLogger('test.log') end_log_setup = time.time() print('single evaluate log setup: %d' % (end_log_setup - start_log_setup)) gpu_id = args['gpu_ids'][-1] torch.manual_seed(args['seed']) npr.seed(args['seed'] + 1) if gpu_id >= 0: torch.cuda.manual_seed(args['seed']) for k in args.keys(): log['test.log'].info('{0}: {1}'.format(k, args[k])) env = create_env(args['env'], args) player = Agent(None, env, args, None) # Wrap the environment so that it saves a video if args['render_video']: player.env = gym.wrappers.Monitor(player.env, output_dir, force=True) start_model = time.time() AC = importlib.import_module(args['model_name']) player.model = AC.ActorCritic(env.observation_space, env.action_space, args['stack_frames'], args) player.gpu_id = gpu_id if gpu_id >= 0: with torch.cuda.device(gpu_id): player.model = player.model.cuda() if args['load_best']: player.model.load_state_dict(pthfile['best_state_dict']) else: player.model.load_state_dict(pthfile['state_dict']) player.model.eval() end_model = time.time() print('single evaluate model setup time: %d' % (end_model - start_model)) # Keep track of returns all_episode_returns = [] for i_episode in range(args['num_episodes']): player.state, player.info = player.env.reset() player.state = torch.from_numpy(player.state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.state = player.state.cuda() player.eps_len = 0 reward_sum = 0 episode_step = 0 while True: player.action_test() reward_sum += player.reward episode_step += 1 if player.done: all_episode_returns.append(reward_sum) #num_tests += 1 #reward_total_sum += reward_sum #reward_mean = reward_total_sum / num_tests log['test.log'].info( "Episode_length, {0}, reward_sum, {1}".format( player.eps_len, reward_sum)) break end_episodes = time.time() print('single evaluate time for %d episodes: %d' % (args['num_episodes'], end_episodes - end_model)) print('single evaluate seconds per episode: %d' % ((end_episodes - end_model) / args['num_episodes'])) all_episode_returns = np.array(all_episode_returns) all_episode_successes = np.array(all_episode_returns > 300., dtype=np.float32) evaluation_statistics = { 'Mean Return': np.mean(all_episode_returns), 'Std Return': np.std(all_episode_returns), 'Min Return': np.min(all_episode_returns), 'Max Return': np.max(all_episode_returns), 'Mean Success': np.mean(all_episode_successes), 'Number Successes': np.sum(all_episode_successes), 'Number Total': args['num_episodes'], 'Std Success': np.std(all_episode_successes), 'Min Success': np.min(all_episode_successes), 'Max Success': np.max(all_episode_successes), 'all_episode_returns': all_episode_returns, 'all_episode_successes': all_episode_successes, } # Save raw data to a file torch.save( { 'all_episode_returns': all_episode_returns, 'all_episode_successes': all_episode_successes, }, os.path.join(output_dir, 'evaluation_statistics.pth')) print( 'Average Episodic Return: \n\tmean: {0}\n\tstd: {1}\n\tmin: {2}\n\t \ max: {3}'.format(np.mean(all_episode_returns), np.std(all_episode_returns), np.min(all_episode_returns), np.max(all_episode_returns))) print( 'Average Episodic Success: \n\tmean: {0} ({1}/{2})\n\tstd: {3}\n\t \ min: {4}\n\tmax: {5}'.format(np.mean(all_episode_successes), np.sum(all_episode_successes), args['num_episodes'], np.std(all_episode_successes), np.min(all_episode_successes), np.max(all_episode_successes))) # Shut down logging system and close open file handles start_log_shutdown = time.time() logging.shutdown() end_time = time.time() print('single evaluate log shutdown: %d' % (end_time - start_log_shutdown)) print('single evaluate total time for %d episodes: %d' % (args['num_episodes'], end_time - start_time)) print('single evaluate overall seconds per episode: %f' % ((end_time - start_time) / args['num_episodes'])) return evaluation_statistics
def test(args, shared_model): ptitle('Test Agent') gpu_id = args.gpu_ids[-1] log = {} setup_logger( '{}_log'.format(args.env), r'{0}{1}{2}_log'.format(args.log_dir, args.save_prefix, args.env)) log['{}_log'.format(args.env)] = logging.getLogger('{}_log'.format( args.env)) d_args = vars(args) for k in d_args.keys(): log['{}_log'.format(args.env)].info('{0}: {1}'.format(k, d_args[k])) torch.manual_seed(args.seed) if gpu_id >= 0: torch.cuda.manual_seed(args.seed) env = create_env(args.env, args) reward_sum = 0 start_time = time.time() num_tests = 0 reward_total_sum = 0 player = Agent(None, None, env, args, None) player.gpu_id = gpu_id AC = importlib.import_module(args.model_name) player.model = AC.ActorCritic(env.observation_space, env.action_space, args.stack_frames, args) player.state, player.info = player.env.reset() player.state = torch.from_numpy(player.state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.model = player.model.cuda() player.state = player.state.cuda() player.model.eval() episode_count = 0 all_scores = [] max_score = 0 while True: if player.done: episode_count += 1 if gpu_id >= 0: with torch.cuda.device(gpu_id): player.model.load_state_dict(shared_model.state_dict()) else: player.model.load_state_dict(shared_model.state_dict()) player.action_test() reward_sum += player.reward if player.done: num_tests += 1 reward_total_sum += reward_sum reward_mean = reward_total_sum / num_tests log['{}_log'.format(args.env)].info( "Time {0}, episode reward {1}, episode length {2}, reward mean {3:.4f}" .format( time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - start_time)), reward_sum, player.eps_len, reward_mean)) # Plot scores every 5 episodes all_scores.append(reward_sum) if (episode_count % 5 == 0): plt.clf() plt.plot(range(len(all_scores)), all_scores) plt.title('Test Episode Returns') plt.xlabel('Test Episode') plt.ylabel('Return') plt.savefig('{0}{1}{2}.png'.format(args.log_dir, args.save_prefix, args.env)) if args.save_max and reward_sum >= max_score: max_score = reward_sum if gpu_id >= 0: with torch.cuda.device(gpu_id): state_to_save = player.model.state_dict() torch.save( state_to_save, '{0}{1}{2}.dat'.format(args.save_model_dir, args.save_prefix, args.env)) else: state_to_save = player.model.state_dict() torch.save( state_to_save, '{0}{1}{2}.dat'.format(args.save_model_dir, args.save_prefix, args.env)) reward_sum = 0 player.eps_len = 0 state, player.info = player.env.reset() time.sleep(60) player.state = torch.from_numpy(state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.state = player.state.cuda()