def get_benchmark(self, benchmark_hash, force=True): conn, cursor = self.connect_db() vars = (benchmark_hash,) cursor.execute("SELECT experiment_hash, benchmark_hash, config_hash, metadata, config, results " " FROM experiments WHERE benchmark_hash=?", vars) results = cursor.fetchall() self.close_db() if len(results) == 0: logging.debug("Did not find benchmark_hash {} in local db.".format(benchmark_hash)) return None benchmark_data = BenchmarkData([result_to_experiment(result) for result in results]) return benchmark_data
def save_benchmark_file(self, benchmark_file): """ Save benchmark to database. Args: benchmark_file: file path to benchmark data file. Returns: boolean """ if not os.path.exists(benchmark_file): raise OSError("No such file: {}".format(benchmark_file)) with open(benchmark_file, 'rb') as fp: benchmark_data = BenchmarkData.from_file(fp) return self.save_benchmark(benchmark_data)
class BenchmarkRunner(object): def __init__(self, config=None, config_folder=None, output_folder='/tmp'): self.config = config self.config_folder = config_folder self.output_folder = output_folder self.history_data = None self.load_model_file = None self.save_history_file = None self.save_history_episodes = 0 self.save_model_file = None self.save_model_episodes = 0 self.current_run_results = None self.report_episodes = 10 self.progress_bar = None self.limit_by_episodes = True # if False, the run is limited by timesteps self.environment_domain = 'user' self.environment_name = None def load_config(self, filename): """ Load config from file. Either state a file from the config_folder (with or without file suffix), or supply full file path, or pass configuration dict object. Args: filename: Filename, or full file path, or dict object Returns: Boolean """ if isinstance(filename, dict): config = filename else: config = load_config_file(filename, config_folder=self.config_folder) if not config: return False if self.config: logging.warning("Overwriting existing configuration") self.config = config return True def set_environment(self, environment, *args, **kwargs): """ Set environment and store as callback. Args: environment: Environment object *args: arguments to pass to environment class constructor **kwargs: keyword arguments to pass to environment class constructor Returns: """ raise NotImplementedError def make_environment(self): """ Create environment. Returns: environment """ raise NotImplementedError def run_experiment(self, environment, experiment_num=0): """ Learn. Args: environment: environment experiment_num: experiment number Returns: """ raise NotImplementedError def load_history(self, history_file): """ Load benchmark history from file Args: history_file: path to history file Returns: """ logging.info( "Loading benchmark history data from {}".format(history_file)) with open(os.path.join(os.getcwd(), history_file), "rb") as fp: self.history_data = pickle.load(fp) def load_model(self, model_file): """ Load model from file Args: model_file: path to model file Returns: """ logging.info("Loading model data from {}".format(model_file)) self.load_model_file = model_file def episode_finished(self, results, runner_id): """ Callback that is called from the runner after each finished episode. Outputs result summaries and saves history. Args: results: results object (or TensorForce runner) runner_id: runner id for distributed execution Returns: Boolean indicating whether to continue run or not. """ if self.progress_bar: if self.limit_by_episodes: self.progress_bar.update(1) else: self.progress_bar.update(results.episode_timestep) self.progress_bar.set_postfix( OrderedDict([ ('R', '{:8.0f}'.format(results.episode_rewards[-1])), ('AR100', '{:8.2f}'.format( np.mean(results.episode_rewards[-100:]))), ('AR500', '{:8.2f}'.format(np.mean(results.episode_rewards[-500:]))) ])) else: if results.episode % self.report_episodes == 0: logging.info( "Finished episode {ep} after {ts} timesteps".format( ep=results.episode, ts=results.episode_timestep)) logging.info("Episode reward: {}".format( results.episode_rewards[-1])) logging.info("Average of last 500 rewards: {:.2f}".format( np.mean(results.episode_rewards[-500:]))) logging.info("Average of last 100 rewards: {:.2f}".format( np.mean(results.episode_rewards[-100:]))) if self.save_history_file and self.save_history_episodes > 0: if results.episode % self.save_history_episodes == 0: logging.debug("Saving benchmark history to {}".format( self.save_history_file)) history_data = dict( episode=results.episode, timestep=results.episode_timestep, episode_rewards=copy(results.episode_rewards), episode_timesteps=copy(results.episode_timesteps), episode_end_times=copy(results.episode_times)) with open(self.save_history_file, 'wb') as fp: pickle.dump(history_data, fp) return True def run(self, experiments=1, report_episodes=10, save_history_file=None, save_history_episodes=0, save_model_file=None, save_model_episodes=0): self.report_episodes = report_episodes self.save_history_file = save_history_file self.save_history_episodes = save_history_episodes self.save_model_file = save_model_file self.save_model_episodes = save_model_episodes self.current_run_results = BenchmarkData() max_episodes = self.config.get('max_episodes') max_timesteps = self.config.get('max_timesteps') assert bool(max_episodes) != bool( max_timesteps ), 'Please limit either by episodes or by timesteps, not both' assert bool(max_episodes) or bool( max_timesteps ), 'Please give a time limit for the run (episodes or timesteps)' if max_episodes: self.limit_by_episodes = True total = int(max_episodes) else: self.limit_by_episodes = False total = int(max_timesteps) logging.info( "Running benchmark with {:d} experiments".format(experiments)) for i in xrange(experiments): config = copy(self.config) environment = self.make_environment() logging.info("Starting experiment {:d}".format(i + 1)) with tqdm(total=total, desc='Experiment {:d}'.format(i + 1)) as self.progress_bar: experiment_start_time = int(time.time()) results = self.run_experiment(environment, i) experiment_end_time = int(time.time()) logging.info("Learning finished.") experiment_data = dict( results=results, metadata=dict(agent=config['type'], episodes=max_episodes, timesteps=max_timesteps, max_episode_timesteps=config.get( 'max_episode_timesteps', 0), environment_domain=self.environment_domain, environment_name=self.environment_name, tensorforce_version=tensorforce_version, tensorflow_version=tensorflow_version, start_time=experiment_start_time, end_time=experiment_end_time), config=dict(config) # make sure this is a dict ) self.current_run_results.append(experiment_data) return self.current_run_results def save_results_db(self, db): """ Save results to database. Args: db: `Database` object Returns: dict containing returned information on save status """ benchmark_data = self.current_run_results return db.save_benchmark(benchmark_data) def save_results_file(self, output_file, append=False, force=False): """ Save results to file. Args: output_file: path to output file (relative to `self.output_folder` or absolute path) append: Boolean indicating whether to append data if output file exists force: Boolean indicating whether to overwrite data if output file exists (append has preference) Returns: boolean """ output_file_path = os.path.join(self.output_folder, output_file) benchmark_data = self.current_run_results if os.path.exists(output_file_path): if not append and not force: logging.error( "Output file exists and should not be appended to or overwritten. Aborting." ) return False if append: logging.info("Loading data from existing output file") with open(output_file_path, 'rb') as fp: old_benchmark_data = pickle.load(fp) benchmark_data = old_benchmark_data + self.current_run_results elif force: logging.warning("Overwriting existing benchmark file.") logging.info("Saving benchmark data to {}".format(output_file_path)) with open(output_file_path, 'wb') as fp: pickle.dump(benchmark_data, fp) return True
def run(self, experiments=1, report_episodes=10, save_history_file=None, save_history_episodes=0, save_model_file=None, save_model_episodes=0): self.report_episodes = report_episodes self.save_history_file = save_history_file self.save_history_episodes = save_history_episodes self.save_model_file = save_model_file self.save_model_episodes = save_model_episodes self.current_run_results = BenchmarkData() max_episodes = self.config.get('max_episodes') max_timesteps = self.config.get('max_timesteps') assert bool(max_episodes) != bool( max_timesteps ), 'Please limit either by episodes or by timesteps, not both' assert bool(max_episodes) or bool( max_timesteps ), 'Please give a time limit for the run (episodes or timesteps)' if max_episodes: self.limit_by_episodes = True total = int(max_episodes) else: self.limit_by_episodes = False total = int(max_timesteps) logging.info( "Running benchmark with {:d} experiments".format(experiments)) for i in xrange(experiments): config = copy(self.config) environment = self.make_environment() logging.info("Starting experiment {:d}".format(i + 1)) with tqdm(total=total, desc='Experiment {:d}'.format(i + 1)) as self.progress_bar: experiment_start_time = int(time.time()) results = self.run_experiment(environment, i) experiment_end_time = int(time.time()) logging.info("Learning finished.") experiment_data = dict( results=results, metadata=dict(agent=config['type'], episodes=max_episodes, timesteps=max_timesteps, max_episode_timesteps=config.get( 'max_episode_timesteps', 0), environment_domain=self.environment_domain, environment_name=self.environment_name, tensorforce_version=tensorforce_version, tensorflow_version=tensorflow_version, start_time=experiment_start_time, end_time=experiment_end_time), config=dict(config) # make sure this is a dict ) self.current_run_results.append(experiment_data) return self.current_run_results
def save_benchmark(self, benchmark_data): if not isinstance(benchmark_data, BenchmarkData): benchmark_data = BenchmarkData(benchmark_data) added_experiments = list( ) # list of experiment ids that have been added to the database added_experiment_hashes = list( ) # list of experiment hashes that have been added to the database benchmark_hashes = list( ) # list of benchmark hashes (both added and duplicates) duplicate_experiments = list( ) # list of experiment ids that already were in the database duplicate_experiment_hashes = list( ) # list of experiment hashes that already were in the database vars = list() for i, experiment_data in enumerate(benchmark_data): experiment_hash, benchmark_hash, config_hash = experiment_data.hash( ) benchmark_hashes.append(benchmark_hash) # check if experiment already exists if (self.get_experiment(experiment_hash)): logging.warning( "Experiment with hash {} already exists, ignoring.".format( experiment_hash)) duplicate_experiments.append(i) duplicate_experiment_hashes.append(experiment_hash) continue added_experiments.append(i) added_experiment_hashes.append(experiment_hash) config = experiment_data.get('config', dict()) metadata = experiment_data.get('metadata', dict()) results = experiment_data.get('results', dict()) vars.append( (experiment_hash, config_hash, benchmark_hash, metadata.get('agent'), metadata.get('episodes'), metadata.get('max_timesteps'), metadata.get('environment_domain'), metadata.get('environment_name'), metadata.get('tensorforce_version'), metadata.get('tensorflow_version'), metadata.get('start_time', 0), metadata.get('end_time', 0), json.dumps(metadata, sort_keys=True), json.dumps(config, sort_keys=True), json.dumps(results, sort_keys=True))) if len(vars) > 0: conn, cursor = self.connect_db() cursor.executemany( "INSERT INTO experiments (experiment_hash, config_hash, benchmark_hash, " "md_agent, md_episode, md_max_timesteps, md_environment_domain, " "md_environment_name, md_tensorforce_version, md_tensorflow_version, " "start_time, end_time, metadata, config, results) VALUES " "(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", vars) conn.commit() self.close_db() else: logging.warning("No rows inserted.") return dict(added_experiments=added_experiments, added_experiment_hashes=added_experiment_hashes, benchmark_hashes=benchmark_hashes, duplicate_experiments=duplicate_experiments, duplicate_experiment_hashes=duplicate_experiment_hashes)
def main(): parser = argparse.ArgumentParser() parser.add_argument('-i', '--input', action='append', nargs=2, metavar=('benchmark', 'name'), help="Input file(s) or hashes") parser.add_argument('-o', '--output', default="output.png", help="output file (image png)") parser.add_argument('-C', '--config-file', default=DEFAULT_CONFIG_FILE, help="config file (for database configuration)") parser.add_argument('-E', '--show-episodes', action='store_true', default=False, help="show rewards by episode number") parser.add_argument('-T', '--show-timesteps', action='store_true', default=False, help="show rewards by global timestep") parser.add_argument('-S', '--show-seconds', action='store_true', default=False, help="show rewards by (wallclock) seconds") args = parser.parse_args() if len(args.input) < 1: raise ValueError("Please state at least one input file and name.") logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) config = load_config(args.config_file, default_config_file=DEFAULT_CONFIG_FILE) local_db = LocalDatabase(**config) plotter = ResultPlotter() # load input files into data dict for (benchmark_lookup, name) in args.input: logger.info("Loading {} ({})".format(benchmark_lookup, name)) benchmark_data = BenchmarkData.from_file_or_hash(benchmark_lookup, db=local_db) plotter.add_benchmark(benchmark_data, name) num_plots = 0 if args.show_episodes: num_plots += 1 if args.show_timesteps: num_plots += 1 if args.show_seconds: num_plots += 1 if num_plots <= 0: logger.error("Please specify at least one plot type (-E, -T, or -S)") return max_row_length = 4 if num_plots <= max_row_length: plot_rows = 1 plot_cols = num_plots else: plot_rows = num_plots // max_row_length + 1 plot_cols = max_row_length figure, axes = plt.subplots(ncols=plot_cols, nrows=plot_rows, figsize=(plot_cols * 6, plot_rows * 6)) if num_plots == 1: axes = [axes] ax_index = -1 if args.show_episodes: ax_index += 1 plot = plotter.plot_reward_by_episode(axes[ax_index]) figure.add_subplot(plot) if args.show_timesteps: ax_index += 1 plot = plotter.plot_reward_by_timestep(axes[ax_index]) figure.add_subplot(plot) if args.show_seconds: ax_index += 1 plot = plotter.plot_reward_by_second(axes[ax_index]) figure.add_subplot(plot) plt.tight_layout() logger.info("Saving figure to {}".format(args.output)) figure.savefig(args.output) return 0