Python CommRolloutWorker Examples

Programming Language: Python

Namespace/Package Name: common.rollout

Examples at hotexamples.com: 5

Python CommRolloutWorker - 5 examples found. These are the top rated real world Python examples of common.rollout.CommRolloutWorker extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

CommRolloutWorker(5)

Frequently Used Methods

CommRolloutWorker (5)

Example #1

Show file

File: runner.py Project: code-cultivater/sota_marl_alg

    def __init__(self, env, args):
        self.env = env

        # 用来在一个稀疏奖赏的环境上评估算法的好坏，胜利为1，失败为-1，其他普通的一步为0
        '''
        self.env_evaluate = StarCraft2Env(map_name=args.map,
                                          step_mul=args.step_mul,
                                          difficulty=args.difficulty,
                                          game_version=args.game_version,
                                          seed=args.seed,
                                          replay_dir=args.replay_dir,
                                          reward_sparse=True,
                                          reward_scale=False)
        '''
        self.env_evaluate = MeetEnv()

        if args.alg.find('commnet') > -1 or args.alg.find('g2anet') > -1:  # communication agent
            self.agents = CommAgents(args)
            self.rolloutWorker = CommRolloutWorker(env, self.agents, args)
            self.evaluateWorker = CommRolloutWorker(self.env_evaluate, self.agents, args)
        else:  # no communication agent
            self.agents = Agents(args)
            self.rolloutWorker = RolloutWorker(env, self.agents, args)
            self.evaluateWorker = RolloutWorker(self.env_evaluate, self.agents, args)
        if args.alg.find('coma') == -1 and args.alg.find('central_v') == -1 and args.alg.find('reinforce') == -1:  # these 3 algorithms are on-poliy
            self.buffer = ReplayBuffer(args)
        self.args = args

        # 用来保存plt和pkl
        self.save_path = self.args.result_dir + '/' + args.alg + '/' + args.map
        if not os.path.exists(self.save_path):
            os.makedirs(self.save_path)

Example #2

Show file

File: runner.py Project: Locke637/vdn_magents

    def __init__(self, env, args):
        self.env = env

        if args.alg.find('commnet') > -1 or args.alg.find(
                'g2anet') > -1:  # communication agent
            self.agents = CommAgents(args)
            self.rolloutWorker = CommRolloutWorker(env, self.agents, args)
        else:  # no communication agent
            self.agents = Agents(args)
            self.rolloutWorker = RolloutWorker(env, self.agents, args)
        if args.learn and args.alg.find('coma') == -1 and args.alg.find(
                'central_v') == -1 and args.alg.find(
                    'reinforce') == -1:  # these 3 algorithms are on-poliy
            if args.use_per:
                self.buffer = PrioritizedReplayBuffer(args)
            else:
                self.buffer = ReplayBuffer(args)
        self.args = args
        self.win_rates = []
        self.episode_rewards = []

        # 用来保存plt和pkl
        self.save_path = self.args.result_dir + '/' + args.map + '/'
        if not os.path.exists(self.save_path):
            os.makedirs(self.save_path)
        self.file_name = self.save_path + str(args.env_name) + '_' + str(
            args.n_agents) + '_' + str(args.map_size) + '_' + args.name_time

Example #3

Show file

File: runner.py Project: joosephook/StarCraftIICurriculum

    def __init__(self, curriculum, args, target_env):
        self.target_env = target_env
        self.curriculum = curriculum

        if args.alg.find('commnet') > -1 or args.alg.find(
                'g2anet') > -1:  # communication agent
            self.agents = CommAgents(args)
            self.rolloutWorker = CommRolloutWorker(None, self.agents, args)
        else:  # no communication agent
            self.agents = Agents(args)
            self.rolloutWorker = RolloutWorker(None, self.agents, args)
        if not args.evaluate and args.alg.find('coma') == -1 and args.alg.find(
                'central_v') == -1 and args.alg.find(
                    'reinforce') == -1:  # these 3 algorithms are on-poliy
            self.buffer = None
        self.args = args
        self.win_rates = []
        self.eval_episode_rewards = []

        # 用来保存plt和pkl
        self.save_path = args.save_path
        if not os.path.exists(self.save_path):
            os.makedirs(self.save_path)

        self.train_rewards = []
        self.ratios = []
        self.historical_params = {}
        self.switch = True  # we will be switching to some task
        self.patience = 20
        self.writer: SummaryWriter = None
        self.eval_envs = None
        self.debug = False

Example #4

Show file

File: runner_mcsac.py Project: puyuan1996/MARL

    def __init__(self, env, args):
        self.env = env
        self.args = args

        if args.alg.find('commnet') > -1 or args.alg.find(
                'g2anet') > -1:  # communication agent
            self.agents = CommAgents(args)
            self.rolloutWorker = CommRolloutWorker(env, self.agents, args)
        else:  # no communication agent
            self.agents = Agents(args)
            self.qmix_pg_learner = QMIX_PG(self.agents, args)
            self.rolloutWorker = RolloutWorker(env, self.agents, args)
        if args.learn and args.alg.find('coma') == -1 and args.alg.find(
                'central_v') == -1 and args.alg.find(
                    'reinforce') == -1:  # these 3 algorithms are on-poliy
            self.actor_critic_buffer = ReplayBuffer(args, args.buffer_size)
            # self.actor_buffer = ReplayBuffer(args, args.actor_buffer_size)
        self.args = args
        self.win_rates = []
        self.episode_rewards = []

        # 用来保存plt和pkl
        tmp = f'clamp2-5_rewardscale10_' + f'{args.buffer_size}_{args.actor_buffer_size}_{args.critic_buffer_size}_{args.actor_train_steps}_{args.critic_train_steps}_' \
                                           f'{args.actor_update_delay}_{args.critic_lr}_{args.n_epoch}_{args.temp}'  # f'clamp2-5_'+ rewardscale10_
        self.save_path = self.args.result_dir + '/linear_mix/' + 'mcsac' + '/' + tmp + '/' + args.map  # _gradclip0.5

        if not os.path.exists(self.save_path):
            os.makedirs(self.save_path)

Example #5

Show file

File: runner.py Project: SJTUwbl/G2ANET

    def __init__(self, env, args):
        self.env = env

        if args.alg.find('commnet') > -1 or args.alg.find(
                'g2anet') > -1:  # communication agent
            self.agents = CommAgents(args)
            self.rolloutWorker = CommRolloutWorker(env, self.agents, args)
        else:  # no communication agent
            self.agents = Agents(args)
            self.rolloutWorker = RolloutWorker(env, self.agents, args)
        if args.learn and args.alg.find('coma') == -1 and args.alg.find(
                'central_v') == -1 and args.alg.find(
                    'reinforce') == -1:  # these 3 algorithms are on-poliy
            self.buffer = ReplayBuffer(args)
        self.args = args
        self.plt_success = []
        self.episode_rewards = []

        # 用来保存plt和pkl
        self.save_path = self.args.result_dir + '/' + args.alg + '/' + args.env_name
        if not os.path.exists(self.save_path):
            os.makedirs(self.save_path)