def timed(msg): print(colorize(msg, color='magenta')) tstart = time.time() yield print( colorize("done in %.3f seconds" % (time.time() - tstart), color='magenta'))
def timed(msg): print(colorize(msg, color="red")) tstart = time.time() yield print( colorize(msg + " done in %.3f seconds" % (time.time() - tstart), color="red"))
def timed(msg): if rank == 0: print(colorize(msg, color='magenta')) tstart = time.time() yield print(colorize("done in %.3f seconds"%(time.time() - tstart), color='magenta')) else: yield
def timed(self,msg): if self.rank == 0: ################################## print(colorize(msg, color='magenta')) tstart = time.time() yield print(colorize("done in %.3f seconds"%(time.time() - tstart), color='magenta')) else: yield
def timed(msg, verbose=True): if verbose: print(colorize(msg, color='magenta')) tstart = time.time() yield if verbose: print( colorize("done in %.3f seconds" % (time.time() - tstart), color='magenta'))
def timed(msg): if rank == 0: print(colorize(msg, color='yellow')) tstart = time.time() yield print( colorize("done in %.3f seconds" % (time.time() - tstart), color='blue')) else: yield
def timed(msg): if rank == 0: print(colorize(msg, color='magenta')) start_time = time.time() yield print( colorize("done in %.3f seconds" % (time.time() - start_time), color='magenta')) else: yield
def timed(self, msg): if self._is_chef: logger.info(colorize(msg, color='magenta')) tstart = time.time() yield logger.info( colorize("done in %.3f seconds" % (time.time() - tstart), color='magenta')) else: yield
elif args.command == 'view': from baselines.common.sacred_utils import load_runs, filter_runs from baselines.common import colorize assert args.name is not None, "Provide an experiment name." assert args.dir is not None, "Provide a directory for experiment." rule = re.compile(args.name + '_*') # Get all screens all_active_screens = 0 for s in list_screens(): if rule.match(s.name): all_active_screens += 1 # Load runs to get active ones runs = load_runs(args.dir) running_runs = filter_runs({'run.status': 'RUNNING'}, runs) print(colorize("==========================================", color='red')) max_eta, max_duration = None, None for key in running_runs.keys(): run = running_runs[key] print(colorize('Run:', color='blue'), "{0} ({1})".format(key, run['config']['env'])) print("\t" + colorize("Steps:", color='blue') + "{0}/{1}".format( len(run['metrics']['EpRewMean']['steps']) + 1, run['config']['max_iters']) + "\t\t" + colorize("Reward:", color='blue') + "{0}".format(run['metrics']['EpRewMean']['values'][-1])) completion = (len(run['metrics']['EpRewMean']['steps']) + 1) / run['config']['max_iters'] start_time = datetime.strptime(run['run']['start_time'], '%Y-%m-%dT%H:%M:%S.%f') duration = datetime.utcnow() - start_time
elif args.command == 'view': from baselines.common.sacred_utils import load_runs, filter_runs from baselines.common import colorize assert args.name is not None, "Provide an experiment name." assert args.dir is not None, "Provide a directory for experiment." rule = re.compile(args.name + '_*') # Get all screens all_active_screens = 0 for s in list_screens(): if rule.match(s.name): all_active_screens += 1 # Load runs to get active ones runs = load_runs(args.dir) running_runs = filter_runs({'run.status': 'RUNNING'}, runs) print(colorize("==========================================", color='red')) max_eta, max_duration = None, None for key in running_runs.keys(): run = running_runs[key] print(colorize('Run:', color='blue'), "{0} ({1})".format(key, run['config']['env'])) print("\t" + colorize("Steps:", color='blue') + "{0}/{1}".format(len(run['metrics']['EpRewMean']['steps'])+1, run['config']['max_iters']) + "\t\t" + colorize("Reward:", color='blue') + "{0}".format(run['metrics']['EpRewMean']['values'][-1]) + "\t\t" + colorize("Seed:", color='blue') + "{0}".format(run['config']['seed']) + "\t\t" + colorize("Delta:", color='blue') + "{0}".format(run['config']['delta'])) completion = (len(run['metrics']['EpRewMean']['steps'])+1) / run['config']['max_iters'] start_time = datetime.strptime(run['run']['start_time'], '%Y-%m-%dT%H:%M:%S.%f') duration = datetime.utcnow() - start_time eta = duration * (1 - completion) / completion max_eta = max(eta, max_eta) if max_eta is not None else eta max_duration = max(duration, max_duration) if max_duration is not None else duration
def best_of_grid(policy, grid_size_1d, mu_min, mu_max, grid_dimension, trainable_std, rho_init, old_rhos_list, iters_so_far, mask_iters, set_parameters, set_parameters_old, delta_cst, renyi_components_sum, evaluate_behav, den_mise, evaluate_behav_last_sample, evaluate_bound, evaluate_renyi, evaluate_roba, filename, plot_bound, plot_ess_profile, delta_t, new_grid): # Compute MISE's denominator and Renyi bound # evaluate the last behav over all samples and add to the denominator set_parameters_old(old_rhos_list[-1]) behav_t = evaluate_behav() den_mise = (den_mise + np.exp(behav_t)) * mask_iters # print(den_mise) for i in range(len(old_rhos_list) - 1): # evaluate all the behavs (except the last) over the last sample set_parameters_old(old_rhos_list[i]) behav = evaluate_behav_last_sample() # print('behhaaaaavvvv', np.exp(behav)) den_mise[iters_so_far-1] = den_mise[iters_so_far-1] + np.exp(behav) # Compute the log of MISE's denominator eps = 1e-24 # to avoid inf weights and nan bound den_mise_it = (den_mise + eps) / iters_so_far den_mise_log = np.log(den_mise_it) * mask_iters # Calculate the grid of parameters to evaluate rho_grid, gain_grid, xyz = \ generate_grid(grid_size_1d, grid_dimension, trainable_std, mu_min=mu_min, mu_max=mu_max) logger.record_tabular("GridSize", len(rho_grid)) # Evaluate the set of parameters and retain the best one bound = [] mise = [] bonus = [] ess_d2 = [] ess_miw = [] bound_best = 0 renyi_bound_best = 0 # print('rho_grid', rho_grid) if new_grid and delta_t == 'continuous': print(colorize('computing renyi bound from scratch', color='magenta')) for i, rho in enumerate(rho_grid): set_parameters(rho) if new_grid and delta_t == 'continuous': for old_rho in old_rhos_list: set_parameters_old(old_rho) renyi_component = evaluate_renyi() renyi_components_sum[i] += 1 / renyi_component renyi_bound = 1 / renyi_components_sum[i] else: set_parameters_old(old_rhos_list[-1]) renyi_component = evaluate_renyi() renyi_components_sum[i] += 1 / renyi_component renyi_bound = 1 / renyi_components_sum[i] bound_rho = evaluate_bound(den_mise_log, renyi_bound) bound.append(bound_rho) if bound_rho > bound_best: bound_best = bound_rho rho_best = rho renyi_bound_best = renyi_bound if plot_bound == 1: # Evaluate bounds' components for plotting mise_rho, bonus_rho, ess_d2_rho, ess_miw_rho = \ evaluate_roba(den_mise_log, renyi_bound) mise.append(mise_rho) bonus.append(bonus_rho) ess_d2.append(ess_d2_rho) ess_miw.append(ess_miw_rho) # Calculate improvement # set_parameters(rho_init) # improvement = bound_best - evaluate_bound(den_mise_log, renyi_bound) improvement = 0 # Plot the profile of the bound and its components if plot_bound == 2: bound = np.array(bound).reshape((grid_size_1d, grid_size_1d)) # mise = np.array(mise).reshape((grid_size_std, grid_size)) plot3D_bound_profile(xyz[0], xyz[1], bound, rho_best, bound_best, iters_so_far, filename) elif plot_bound == 1: plot_bound_profile(gain_grid[0], bound, mise, bonus, rho_best[0], bound_best, iters_so_far, filename) # plot_ess(gain_grid, ess_d2, iters_so_far, 'd2_' + filename) # plot_ess(gain_grid, ess_miw, iters_so_far, 'miw_' + filename) return rho_best, improvement, den_mise_log, den_mise, \ renyi_components_sum, renyi_bound_best
def __init__(self, agent, network, nsteps, rho, max_kl, ent_coef, vf_stepsize, vf_iters, cg_damping, cg_iters, seed, load_path, **network_kwargs): super(AgentModel, self).__init__(name='MATRPOModel') self.agent = agent self.nsteps = nsteps self.rho = rho self.max_kl = max_kl self.ent_coef = ent_coef self.cg_damping = cg_damping self.cg_iters = cg_iters self.vf_stepsize = vf_stepsize self.vf_iters = vf_iters set_global_seeds(seed) np.set_printoptions(precision=3) if MPI is not None: self.nworkers = MPI.COMM_WORLD.Get_size() self.rank = MPI.COMM_WORLD.Get_rank() else: self.nworkers = 1 self.rank = 0 # Setup losses and stuff # ---------------------------------------- ob_space = agent.observation_space ac_space = agent.action_space with tf.name_scope(agent.name): if isinstance(network, str): network = get_network_builder(network)(**network_kwargs) with tf.name_scope("pi"): pi_policy_network = network(ob_space.shape) pi_value_network = network(ob_space.shape) self.pi = pi = PolicyWithValue(ac_space, pi_policy_network, pi_value_network) with tf.name_scope("oldpi"): old_pi_policy_network = network(ob_space.shape) old_pi_value_network = network(ob_space.shape) self.oldpi = oldpi = PolicyWithValue(ac_space, old_pi_policy_network, old_pi_value_network) self.comm_matrix = agent.comm_matrix.copy() self.estimates = np.zeros([agent.nmates, nsteps], dtype=np.float32) self.multipliers = np.ones([self.agent.nmates, self.nsteps]).astype(np.float32) pi_var_list = pi_policy_network.trainable_variables + list( pi.pdtype.trainable_variables) old_pi_var_list = old_pi_policy_network.trainable_variables + list( oldpi.pdtype.trainable_variables) vf_var_list = pi_value_network.trainable_variables + pi.value_fc.trainable_variables old_vf_var_list = old_pi_value_network.trainable_variables + oldpi.value_fc.trainable_variables self.pi_var_list = pi_var_list self.old_pi_var_list = old_pi_var_list self.vf_var_list = vf_var_list self.old_vf_var_list = old_vf_var_list if load_path is not None: load_path = osp.expanduser(load_path) ckpt = tf.train.Checkpoint(model=pi) load_path = load_path + '/agent_{}'.format(self.agent.id) manager = tf.train.CheckpointManager(ckpt, load_path, max_to_keep=None) ckpt.restore(manager.latest_checkpoint) print( colorize('Agent{}\'s Model restored!'.format(self.agent.id), color='magenta')) self.vfadam = MpiAdam(vf_var_list) self.get_flat = U.GetFlat(pi_var_list) self.set_from_flat = U.SetFromFlat(pi_var_list) self.loss_names = [ "Lagrange", "surrgain", "sync", "meankl", "entloss", "entropy" ] self.shapes = [var.get_shape().as_list() for var in pi_var_list]