Esempio n. 1
0
 def timed(msg):
     print(colorize(msg, color='magenta'))
     tstart = time.time()
     yield
     print(
         colorize("done in %.3f seconds" % (time.time() - tstart),
                  color='magenta'))
Esempio n. 2
0
 def timed(msg):
     print(colorize(msg, color="red"))
     tstart = time.time()
     yield
     print(
         colorize(msg + " done in %.3f seconds" % (time.time() - tstart),
                  color="red"))
Esempio n. 3
0
 def timed(msg):
     if rank == 0:
         print(colorize(msg, color='magenta'))
         tstart = time.time()
         yield
         print(colorize("done in %.3f seconds"%(time.time() - tstart), color='magenta'))
     else:
         yield
Esempio n. 4
0
 def timed(self,msg):
     if self.rank == 0:                               ##################################
         print(colorize(msg, color='magenta'))
         tstart = time.time()
         yield
         print(colorize("done in %.3f seconds"%(time.time() - tstart), color='magenta'))
     else:
         yield
Esempio n. 5
0
def timed(msg, verbose=True):
    if verbose: print(colorize(msg, color='magenta'))
    tstart = time.time()
    yield
    if verbose:
        print(
            colorize("done in %.3f seconds" % (time.time() - tstart),
                     color='magenta'))
Esempio n. 6
0
 def timed(msg):
     if rank == 0:
         print(colorize(msg, color='yellow'))
         tstart = time.time()
         yield
         print(
             colorize("done in %.3f seconds" % (time.time() - tstart),
                      color='blue'))
     else:
         yield
Esempio n. 7
0
 def timed(msg):
     if rank == 0:
         print(colorize(msg, color='magenta'))
         start_time = time.time()
         yield
         print(
             colorize("done in %.3f seconds" % (time.time() - start_time),
                      color='magenta'))
     else:
         yield
Esempio n. 8
0
 def timed(self, msg):
     if self._is_chef:
         logger.info(colorize(msg, color='magenta'))
         tstart = time.time()
         yield
         logger.info(
             colorize("done in %.3f seconds" % (time.time() - tstart),
                      color='magenta'))
     else:
         yield
Esempio n. 9
0
elif args.command == 'view':
    from baselines.common.sacred_utils import load_runs, filter_runs
    from baselines.common import colorize
    assert args.name is not None, "Provide an experiment name."
    assert args.dir is not None, "Provide a directory for experiment."
    rule = re.compile(args.name + '_*')
    # Get all screens
    all_active_screens = 0
    for s in list_screens():
        if rule.match(s.name):
            all_active_screens += 1
    # Load runs to get active ones
    runs = load_runs(args.dir)
    running_runs = filter_runs({'run.status': 'RUNNING'}, runs)
    print(colorize("==========================================", color='red'))
    max_eta, max_duration = None, None
    for key in running_runs.keys():
        run = running_runs[key]
        print(colorize('Run:', color='blue'),
              "{0} ({1})".format(key, run['config']['env']))
        print("\t" + colorize("Steps:", color='blue') + "{0}/{1}".format(
            len(run['metrics']['EpRewMean']['steps']) +
            1, run['config']['max_iters']) + "\t\t" +
              colorize("Reward:", color='blue') +
              "{0}".format(run['metrics']['EpRewMean']['values'][-1]))
        completion = (len(run['metrics']['EpRewMean']['steps']) +
                      1) / run['config']['max_iters']
        start_time = datetime.strptime(run['run']['start_time'],
                                       '%Y-%m-%dT%H:%M:%S.%f')
        duration = datetime.utcnow() - start_time
Esempio n. 10
0
elif args.command == 'view':
    from baselines.common.sacred_utils import load_runs, filter_runs
    from baselines.common import colorize
    assert args.name is not None, "Provide an experiment name."
    assert args.dir is not None, "Provide a directory for experiment."
    rule = re.compile(args.name + '_*')
    # Get all screens
    all_active_screens = 0
    for s in list_screens():
        if rule.match(s.name):
            all_active_screens += 1
    # Load runs to get active ones
    runs = load_runs(args.dir)
    running_runs = filter_runs({'run.status': 'RUNNING'}, runs)
    print(colorize("==========================================", color='red'))
    max_eta, max_duration = None, None
    for key in running_runs.keys():
        run = running_runs[key]
        print(colorize('Run:', color='blue'), "{0} ({1})".format(key, run['config']['env']))
        print("\t" + colorize("Steps:", color='blue') +
                "{0}/{1}".format(len(run['metrics']['EpRewMean']['steps'])+1, run['config']['max_iters']) +
                "\t\t" + colorize("Reward:", color='blue') + "{0}".format(run['metrics']['EpRewMean']['values'][-1]) +
                "\t\t" + colorize("Seed:", color='blue') + "{0}".format(run['config']['seed']) +
                "\t\t" + colorize("Delta:", color='blue') + "{0}".format(run['config']['delta']))
        completion = (len(run['metrics']['EpRewMean']['steps'])+1) / run['config']['max_iters']
        start_time = datetime.strptime(run['run']['start_time'], '%Y-%m-%dT%H:%M:%S.%f')
        duration = datetime.utcnow() - start_time
        eta = duration * (1 - completion) / completion
        max_eta = max(eta, max_eta) if max_eta is not None else eta
        max_duration = max(duration, max_duration) if max_duration is not None else duration
Esempio n. 11
0
def best_of_grid(policy, grid_size_1d, mu_min, mu_max, grid_dimension,
                 trainable_std, rho_init, old_rhos_list,
                 iters_so_far, mask_iters,
                 set_parameters, set_parameters_old,
                 delta_cst, renyi_components_sum,
                 evaluate_behav, den_mise,
                 evaluate_behav_last_sample,
                 evaluate_bound, evaluate_renyi, evaluate_roba,
                 filename, plot_bound, plot_ess_profile, delta_t, new_grid):

    # Compute MISE's denominator and Renyi bound
    # evaluate the last behav over all samples and add to the denominator
    set_parameters_old(old_rhos_list[-1])
    behav_t = evaluate_behav()
    den_mise = (den_mise + np.exp(behav_t)) * mask_iters
    # print(den_mise)
    for i in range(len(old_rhos_list) - 1):
        # evaluate all the behavs (except the last) over the last sample
        set_parameters_old(old_rhos_list[i])
        behav = evaluate_behav_last_sample()
        # print('behhaaaaavvvv', np.exp(behav))
        den_mise[iters_so_far-1] = den_mise[iters_so_far-1] + np.exp(behav)

    # Compute the log of MISE's denominator
    eps = 1e-24  # to avoid inf weights and nan bound
    den_mise_it = (den_mise + eps) / iters_so_far
    den_mise_log = np.log(den_mise_it) * mask_iters

    # Calculate the grid of parameters to evaluate
    rho_grid, gain_grid, xyz = \
        generate_grid(grid_size_1d, grid_dimension, trainable_std,
                      mu_min=mu_min, mu_max=mu_max)
    logger.record_tabular("GridSize", len(rho_grid))

    # Evaluate the set of parameters and retain the best one
    bound = []
    mise = []
    bonus = []
    ess_d2 = []
    ess_miw = []
    bound_best = 0
    renyi_bound_best = 0
    # print('rho_grid', rho_grid)
    if new_grid and delta_t == 'continuous':
        print(colorize('computing renyi bound from scratch', color='magenta'))
    for i, rho in enumerate(rho_grid):
        set_parameters(rho)
        if new_grid and delta_t == 'continuous':
            for old_rho in old_rhos_list:
                set_parameters_old(old_rho)
                renyi_component = evaluate_renyi()
                renyi_components_sum[i] += 1 / renyi_component
            renyi_bound = 1 / renyi_components_sum[i]
        else:
            set_parameters_old(old_rhos_list[-1])
            renyi_component = evaluate_renyi()
            renyi_components_sum[i] += 1 / renyi_component
            renyi_bound = 1 / renyi_components_sum[i]

        bound_rho = evaluate_bound(den_mise_log, renyi_bound)
        bound.append(bound_rho)
        if bound_rho > bound_best:
            bound_best = bound_rho
            rho_best = rho
            renyi_bound_best = renyi_bound
        if plot_bound == 1:
            # Evaluate bounds' components for plotting
            mise_rho, bonus_rho, ess_d2_rho, ess_miw_rho = \
                evaluate_roba(den_mise_log, renyi_bound)
            mise.append(mise_rho)
            bonus.append(bonus_rho)
            ess_d2.append(ess_d2_rho)
            ess_miw.append(ess_miw_rho)

    # Calculate improvement
    # set_parameters(rho_init)
    # improvement = bound_best - evaluate_bound(den_mise_log, renyi_bound)
    improvement = 0

    # Plot the profile of the bound and its components
    if plot_bound == 2:
        bound = np.array(bound).reshape((grid_size_1d, grid_size_1d))
        # mise = np.array(mise).reshape((grid_size_std, grid_size))
        plot3D_bound_profile(xyz[0], xyz[1], bound, rho_best,
                             bound_best, iters_so_far, filename)
    elif plot_bound == 1:
        plot_bound_profile(gain_grid[0], bound, mise, bonus, rho_best[0],
                           bound_best, iters_so_far, filename)
        # plot_ess(gain_grid, ess_d2, iters_so_far, 'd2_' + filename)
        # plot_ess(gain_grid, ess_miw, iters_so_far, 'miw_' + filename)

    return rho_best, improvement, den_mise_log, den_mise, \
        renyi_components_sum, renyi_bound_best
    def __init__(self, agent, network, nsteps, rho, max_kl, ent_coef,
                 vf_stepsize, vf_iters, cg_damping, cg_iters, seed, load_path,
                 **network_kwargs):
        super(AgentModel, self).__init__(name='MATRPOModel')
        self.agent = agent
        self.nsteps = nsteps
        self.rho = rho
        self.max_kl = max_kl
        self.ent_coef = ent_coef
        self.cg_damping = cg_damping
        self.cg_iters = cg_iters
        self.vf_stepsize = vf_stepsize
        self.vf_iters = vf_iters

        set_global_seeds(seed)

        np.set_printoptions(precision=3)

        if MPI is not None:
            self.nworkers = MPI.COMM_WORLD.Get_size()
            self.rank = MPI.COMM_WORLD.Get_rank()
        else:
            self.nworkers = 1
            self.rank = 0

        # Setup losses and stuff
        # ----------------------------------------
        ob_space = agent.observation_space
        ac_space = agent.action_space

        with tf.name_scope(agent.name):
            if isinstance(network, str):
                network = get_network_builder(network)(**network_kwargs)
            with tf.name_scope("pi"):
                pi_policy_network = network(ob_space.shape)
                pi_value_network = network(ob_space.shape)
                self.pi = pi = PolicyWithValue(ac_space, pi_policy_network,
                                               pi_value_network)
            with tf.name_scope("oldpi"):
                old_pi_policy_network = network(ob_space.shape)
                old_pi_value_network = network(ob_space.shape)
                self.oldpi = oldpi = PolicyWithValue(ac_space,
                                                     old_pi_policy_network,
                                                     old_pi_value_network)

        self.comm_matrix = agent.comm_matrix.copy()
        self.estimates = np.zeros([agent.nmates, nsteps], dtype=np.float32)
        self.multipliers = np.ones([self.agent.nmates,
                                    self.nsteps]).astype(np.float32)

        pi_var_list = pi_policy_network.trainable_variables + list(
            pi.pdtype.trainable_variables)
        old_pi_var_list = old_pi_policy_network.trainable_variables + list(
            oldpi.pdtype.trainable_variables)
        vf_var_list = pi_value_network.trainable_variables + pi.value_fc.trainable_variables
        old_vf_var_list = old_pi_value_network.trainable_variables + oldpi.value_fc.trainable_variables

        self.pi_var_list = pi_var_list
        self.old_pi_var_list = old_pi_var_list
        self.vf_var_list = vf_var_list
        self.old_vf_var_list = old_vf_var_list

        if load_path is not None:
            load_path = osp.expanduser(load_path)
            ckpt = tf.train.Checkpoint(model=pi)
            load_path = load_path + '/agent_{}'.format(self.agent.id)
            manager = tf.train.CheckpointManager(ckpt,
                                                 load_path,
                                                 max_to_keep=None)
            ckpt.restore(manager.latest_checkpoint)
            print(
                colorize('Agent{}\'s Model restored!'.format(self.agent.id),
                         color='magenta'))

        self.vfadam = MpiAdam(vf_var_list)

        self.get_flat = U.GetFlat(pi_var_list)
        self.set_from_flat = U.SetFromFlat(pi_var_list)
        self.loss_names = [
            "Lagrange", "surrgain", "sync", "meankl", "entloss", "entropy"
        ]
        self.shapes = [var.get_shape().as_list() for var in pi_var_list]