def __init__(self, env, networks, scope=None, n_itr=5000, start_itr=0, batch_size=32, max_path_length=200, discount=0.99, plot=False, pause_for_plot=False, center_adv=True, max_epsilon=1, min_epsilon=0.01, store_paths=False, whole_paths=True, sampler_cls=None, sampler_args=None, force_batch_sampler=True, pre_trained_size=10000, target_network_update=1000, save_param_update=125, **kwargs): """ :param env: Environment :param policy: Policy :type policy: Policy :param scope: Scope for identifying the algorithm. Must be specified if running multiple algorithms simultaneously, each using different environments and policies :param n_itr: Number of iterations. :param start_itr: Starting iteration. :param batch_size: Number of samples per iteration. :param max_path_length: Maximum length of a single rollout. :param discount: Discount. :param plot: Plot evaluation run after each iteration. :param pause_for_plot: Whether to pause before contiuing when plotting. :param store_paths: Whether to save all paths data to the snapshot. :return: """ self.ma_mode = kwargs.pop('ma_mode', 'centralized') self.env = env self.policy = networks['q_network'] self.target_policy = networks['target_q_network'] self.scope = scope self.n_itr = n_itr self.start_itr = start_itr self.batch_size = batch_size self.max_path_length = max_path_length self.discount = discount self.plot = plot self.max_epsilon = max_epsilon self.min_epsilon = min_epsilon self.pause_for_plot = pause_for_plot self.center_adv = center_adv self.store_paths = store_paths self.whole_paths = whole_paths self.force_batch_sampler = force_batch_sampler self.loss_after = 0 self.mean_kl = 0 self.max_kl = 0 self.save_param_update = save_param_update self.target_network_update = target_network_update self.writer = None self.write_op = None self.pre_trained_size = pre_trained_size self.total_episodic_rewards = None self.s_loss = None self.s_avg_rewards = None self.s_total_rewards = None if sampler_cls is None: sampler_cls = ExpReplayMASampler if sampler_args is None: sampler_args = dict() self.sampler = sampler_cls(algo=self, **sampler_args) if plot: from rllab.plotter import plotter plotter.init_worker() self.init_opt()
def start_worker(self): self.sampler.start_worker() if self.plot: plotter.init_worker()### plotter.init_plot(self.env, self.policy)