Exemple #1
0
    def buffer(self):
        """Get the current buffer displayed in this window.

        Return (Buffer): The current buffer, None if there is no current buffer.
        """

        return Buffer(symbols.win.get_buffer(self.struct))
Exemple #2
0
 def __init__(self, message):
     self.java_obj = message
     if isinstance(message.body, org.vertx.java.core.json.JsonObject):
         self.body = map_from_java(message.body.toMap())
     elif isinstance(message.body, org.vertx.java.core.buffer.Buffer):
         self.body = Buffer(message.body)
     else:
         self.body = map_from_java(message.body)
    def __init__(self, args):
        self.args = args
        self.agents = [pg.TD3(args) for _ in range(self.args.num_rover)]

        #Load to GPU
        for ag in self.agents:
            ag.to_cuda()

        ###### Buffer is agent's own data self generated via its rollouts #########
        self.buffers = [Buffer() for _ in range(self.args.num_rover)]
        self.noise_gen = OU_handle.get_list_generators(NUM_WORKERS,
                                                       args.action_dim)

        ######### Multiprocessing TOOLS #########
        self.manager = Manager()
        self.data_bucket = [
            self.manager.list() for _ in range(args.num_rover)
        ]  #Experience list stores experiences from all processes

        ######### TRAIN ROLLOUTS WITH ACTION NOISE ############
        self.models_bucket = self.manager.list()
        model_template = models.Actor(args)
        for _ in range(self.args.num_rover):
            self.models_bucket.append(models.Actor(args))
        self.task_pipes = [Pipe() for _ in range(NUM_WORKERS)]
        self.result_pipes = [Pipe() for _ in range(NUM_WORKERS)]
        self.train_workers = [
            Process(target=rollout_worker,
                    args=(self.args, i, self.task_pipes[i][1],
                          self.result_pipes[i][0], self.noise_gen[i],
                          self.data_bucket, self.models_bucket,
                          model_template)) for i in range(NUM_WORKERS)
        ]
        for worker in self.train_workers:
            worker.start()

        ######## TEST ROLLOUT POLICY ############
        self.test_task_pipe = Pipe()
        self.test_result_pipe = Pipe()
        self.test_worker = Process(target=rollout_worker,
                                   args=(self.args, 0, self.test_task_pipe[1],
                                         self.test_result_pipe[0], None,
                                         self.data_bucket, self.models_bucket,
                                         model_template))
        self.test_worker.start()

        #### STATS AND TRACKING WHICH ROLLOUT IS DONE ######
        self.best_policy = models.Actor(args)  #Best policy found by PF yet
        self.best_score = -999
        self.test_score = None
        self.test_eval_flag = True
        self.rollout_scores = [None for _ in range(NUM_WORKERS)]
        self.best_rollout_score = -999
        self.train_eval_flag = [True for _ in range(NUM_WORKERS)]
        self.update_budget = 0
Exemple #4
0
    def __init__(
        self,
        CERL_agent,
        num_workers,
        trainers,
        pomdp_adv=False
    ):  #trainers first is the blue agent and second is the red model
        self.num_workers = num_workers
        self.trainers = trainers
        self.pomdp_adv = pomdp_adv
        self.args = CERL_agent.args
        self.drqn = CERL_agent.args.drqn  #denote if blue uses drqn
        if self.pomdp_adv:
            self.trainers = [trainers[0],
                             None]  #make sure the red model is never used
        self.buffer_gpu = CERL_agent.args.buffer_gpu
        self.batch_size = CERL_agent.args.batch_size
        self.algo = CERL_agent.args.algo
        self.state_dim = CERL_agent.args.state_dim
        self.action_dim = CERL_agent.args.action_dim
        self.buffer = Buffer(BUFFER_SIZE,
                             self.buffer_gpu)  #initialize own replay buffer
        self.data_bucket = self.buffer.tuples
        self.evo_task_pipes = [Pipe() for _ in range(self.num_workers)]
        self.evo_result_pipes = [Pipe() for _ in range(self.num_workers)]
        self.actual_red_worker = Actor(
            CERL_agent.args.state_dim, CERL_agent.args.action_dim, -1,
            'dis')  #this model is shared accross the workers
        self.actual_red_worker.share_memory()
        self.td3args = {
            'policy_noise': 0.2,
            'policy_noise_clip': 0.5,
            'policy_ups_freq': 2,
            'action_low': CERL_agent.args.action_low,
            'action_high': CERL_agent.args.action_high,
            'cerl_args': self.args
        }
        self.renew_learner(
        )  #now we are not using new learner for each iteration
        self.rollout_bucket = [
            self.actual_red_worker for i in range(num_workers)
        ]
        self.workers = [
            Process(target=rollout_worker,
                    args=(id, 3, self.evo_task_pipes[id][1],
                          self.evo_result_pipes[id][0], False,
                          self.data_bucket, self.rollout_bucket, 'dummy_name',
                          None, 'dis', self.trainers, False, self.pomdp_adv))
            for id in range(num_workers)
        ]

        for worker in self.workers:
            worker.start()
        self.evo_flag = [True for _ in range(self.num_workers)]
Exemple #5
0
    def each(self, func):
        """Call the func for every element of the set

        Keyword arguments:
        @param func: The function to call.
        """
        iter = self.java_obj.iterator()
        while iter.hasNext():
            obj = iter.next()
            if isinstance(obj, org.vertx.java.core.buffer.Buffer):
                obj = Buffer(obj)
            func(obj)
	def __init__(self, wwid, algo_name, state_dim, action_dim, actor_lr, critic_lr, gamma, tau, init_w = True, **td3args):
		self.td3args = td3args; self.id = id
		self.wwid = wwid
		self.algo = Off_Policy_Algo(wwid, algo_name, state_dim, action_dim, actor_lr, critic_lr, gamma, tau, init_w)
		self.args = td3args['cerl_args']

		#LEARNER STATISTICS
		self.fitnesses = []
		self.ep_lens = []
		self.value = None
		self.visit_count = 0
		self.private_replay_buffer = Buffer(1000000, self.args.buffer_gpu)   #
Exemple #7
0
    def _init(self, bufptr):
        new = self.instclass()

        self.instances[id(new)] = new

        if hasattr(new, "init"):
            rtn = new.init(Buffer(bufptr))

            if isinstance(rtn, int):
                return rtn

        return id(new)
Exemple #8
0
	def __init__(self, args, model_constructor, env_constructor):
		self.args = args

		#MP TOOLS
		self.manager = Manager()

		#Algo
		sac_keyargs = {}
		sac_keyargs['autotune'] = args.autotune
		sac_keyargs['entropy'] = True
		self.algo = SAC(args, model_constructor, args.gamma, **sac_keyargs)

		# #Save best policy
		# self.best_policy = model_constructor.make_model('actor')

		#Init BUFFER
		self.replay_buffer = Buffer(args.buffer_size)
		self.data_bucket = self.replay_buffer.tuples

		#Initialize Rollout Bucket
		self.rollout_bucket = self.manager.list()
		self.rollout_bucket.append(model_constructor.make_model('Gaussian_FF'))

		############## MULTIPROCESSING TOOLS ###################
		#Learner rollout workers
		self.task_pipes = [Pipe() for _ in range(args.rollout_size)]
		self.result_pipes = [Pipe() for _ in range(args.rollout_size)]
		self.workers = [Process(target=rollout_worker, args=(id, 'pg', self.task_pipes[id][1], self.result_pipes[id][0], self.data_bucket, self.rollout_bucket, env_constructor)) for id in range(args.rollout_size)]
		for worker in self.workers: worker.start()
		self.roll_flag = [True for _ in range(args.rollout_size)]

		#Test bucket
		self.test_bucket = self.manager.list()
		self.test_bucket.append(model_constructor.make_model('Gaussian_FF'))

		#5 Test workers
		self.test_task_pipes = [Pipe() for _ in range(env_constructor.dummy_env.test_size)]
		self.test_result_pipes = [Pipe() for _ in range(env_constructor.dummy_env.test_size)]
		self.test_workers = [Process(target=rollout_worker, args=(id, 'test', self.test_task_pipes[id][1], self.test_result_pipes[id][0], None, self.test_bucket, env_constructor)) for id in range(env_constructor.dummy_env.test_size)]
		for worker in self.test_workers: worker.start()
		self.test_flag = False

		#Trackers
		self.best_score = 0.0; self.gen_frames = 0; self.total_frames = 0; self.test_score = None; self.test_std = None; self.test_trace = []; self.rollout_fits_trace = []

		self.ep_len = 0
		self.r1_reward = 0
		self.num_footsteps = 0
		self.best_shaped_score = 0.0
Exemple #9
0
async def test_get_frame_complex():
    buffer = Buffer()
    last_time = await buffer.get_last_time()

    # Derive the start time as a function of the
    # end time minus the window size
    start = round(last_time - (int(DC.window_size) * 60))

    assets = await buffer.get_random_assets(DC.asset_num)

    f = await buffer.get_frame_complex(start, last_time, ['ETH'])

    print(f[[
        'ask_price_0', 'ask_quantity_0', 'ask_price_1', 'ask_quantity_1',
        'close'
    ]].to_string())
Exemple #10
0
def map_from_vertx(value):
    """Converts a Vert.x type to a Jython type."""
    if value is None:
        return value
    if isinstance(value, Map):
        return map_map_from_java(value)
    elif isinstance(value, Set):
        return map_set_from_java(value)
    elif isinstance(value, Collection):
        return map_collection_from_java(value)
    elif isinstance(value, org.vertx.java.core.json.JsonObject):
        return map_object_from_java(value)
    elif isinstance(value, org.vertx.java.core.json.JsonArray):
        return map_array_from_java(value)
    elif isinstance(value, org.vertx.java.core.buffer.Buffer):
        return Buffer(value)
    return value
Exemple #11
0
	def __init__(self, args, id):
		self.args = args
		self.id = id

		###Initalize neuroevolution module###
		self.evolver = SSNE(self.args)

		########Initialize population
		self.manager = Manager()
		self.popn = self.manager.list()
		for _ in range(args.popn_size):
			self.popn.append(MultiHeadActor(args.state_dim, args.action_dim, args.hidden_size, args.config.num_agents))
			self.popn[-1].eval()

		#### INITIALIZE PG ALGO #####
		if self.args.is_matd3 or args.is_maddpg:
			algo_name = 'TD3' if self.args.is_matd3 else 'DDPG'
			self.algo = MATD3(id, algo_name, args.state_dim, args.action_dim, args.hidden_size, args.actor_lr,
			                args.critic_lr, args.gamma, args.tau, args.savetag, args.aux_save, 
			                args.use_gpu, args.config.num_agents, args.init_w)

		else:
			self.algo = MultiTD3(id, 'TD3', args.state_dim, args.action_dim, args.hidden_size, args.actor_lr,
			                args.critic_lr, args.gamma, args.tau, args.savetag, args.aux_save,
			                args.use_gpu, args.config.num_agents, args.init_w)


		#### Rollout Actor is a template used for MP #####
		self.rollout_actor = self.manager.list()
		self.rollout_actor.append(MultiHeadActor(args.state_dim, args.action_dim, args.hidden_size, args.config.num_agents))

		#Initalize buffer
		self.buffer = [Buffer(args.buffer_size, buffer_gpu=False) for _ in range(args.config.num_agents)]

		#Agent metrics
		self.fitnesses = [[] for _ in range(args.popn_size)]

		###Best Policy HOF####
		self.champ_ind = 0
Exemple #12
0
    def __init__(self, logger, redis_host, redis_port, redis_db):
        DataBase.__init__(self, logger, redis_host, redis_port, redis_db)

        self.pubsub = pubsub.Pubsub(
            logger, redis.Redis(host=redis_host, port=redis_port, db=redis_db))
        self.balancer = balancer.Balancer(logger, self.rc, self.pubsub)
        self.cache = cache.Cache(logger, self.rc)
        self.provider = {
            'facebook': provider_data.ProviderData(self.rc, 'facebook'),
            'twitter': provider_data.ProviderData(self.rc, 'twitter'),
            'tumblr': provider_data.ProviderData(self.rc, 'tumblr'),
            'flickr': provider_data.ProviderData(self.rc, 'flickr'),
            '500px': provider_data.ProviderData(self.rc, '500px'),
            'linkedin': provider_data.ProviderData(self.rc, 'linkedin'),
        }
        self.facebook = self.provider['facebook']
        self.twitter = self.provider['twitter']
        self.tumblr = self.provider['tumblr']
        self.flickr = self.provider['flickr']
        self.px500 = self.provider['500px']
        self.linkedin = self.provider['linkedin']
        self.filter = FilterData(self.rc)
        self.buffer = Buffer(logger, self.rc, self.pubsub)
Exemple #13
0
 def buffer(self):
     """Return the buffer this cursor is associated with."""
     return Buffer(symbols.cursor.get_buffer(self.struct))
Exemple #14
0
 def handle(self, buffer):
     """Call the handler after buffer parsed"""
     self.handler(Buffer(buffer))
Exemple #15
0
sys.path.insert(0, parentdir)

import constants.environment as env_const
from core.exceptions import InvalidUsage, RouteNotFound
from core.buffer import Buffer
from core.test_buffer import TestBuffer
from core.registry import Registry

########## App setup ##########
app = Application()
r = app.router

########## Instantiate globals ##########
env_type = os.environ.get('ENV_TYPE')
if env_type == "production":
    buffer = Buffer()
else:
    buffer = TestBuffer()

registry = Registry(buffer=buffer)

########## Error handling ##########


def get_required_param(json, param):
    if json is None:
        logger.info("Request is not a valid json")
        raise InvalidUsage("Request is not a valid json")
    value = json.get(param, None)
    if (value is None) or (value == '') or (value == []):
        logger.info("A required request parameter '{}' had value {}".format(
Exemple #16
0
def map_buffer_from_java(obj):
    """Converts a Buffer to a Python Buffer."""
    return Buffer(obj)
Exemple #17
0
 def converter(buffer):
     return Buffer(buffer)
 def handle(self, buffer):
     self.handler(Buffer(buffer))
Exemple #19
0
 def data(self):
     """Returns the data as buffer that was received."""
     if self.buffer is None:
         self.buffer = Buffer(self.packet.data())
     return self.buffer
Exemple #20
0
    def __init__(self, args, model_constructor, env_constructor):
        self.args = args
        self.policy_string = self.compute_policy_type()

        #Evolution
        self.evolver = SSNE(self.args)

        #MP TOOLS
        self.manager = Manager()

        #Genealogy tool
        self.genealogy = Genealogy()

        #Initialize population
        self.population = self.manager.list()
        seed = True
        for _ in range(args.pop_size):
            self.population.append(
                model_constructor.make_model(self.policy_string, seed=seed))
            seed = False

        #SEED
        #self.population[0].load_state_dict(torch.load('Results/Auxiliary/_bestcerl_td3_s2019_roll10_pop10_portfolio10'))

        #Save best policy
        self.best_policy = model_constructor.make_model(self.policy_string)

        #Turn off gradients and put in eval mod
        for actor in self.population:
            actor = actor.cpu()
            actor.eval()

        #Init BUFFER
        self.replay_buffer = Buffer(args.buffer_size)
        self.data_bucket = self.replay_buffer.tuples

        #Intialize portfolio of learners
        self.portfolio = []
        self.portfolio = initialize_portfolio(self.portfolio, self.args,
                                              self.genealogy,
                                              args.portfolio_id,
                                              model_constructor)

        #Initialize Rollout Bucket
        self.rollout_bucket = self.manager.list()
        for _ in range(len(self.portfolio)):
            self.rollout_bucket.append(
                model_constructor.make_model(self.policy_string))

        ############## MULTIPROCESSING TOOLS ###################

        #Evolutionary population Rollout workers
        self.evo_task_pipes = [Pipe() for _ in range(args.pop_size)]
        self.evo_result_pipes = [Pipe() for _ in range(args.pop_size)]
        self.evo_workers = [
            Process(target=rollout_worker,
                    args=(id, 'evo', self.evo_task_pipes[id][1],
                          self.evo_result_pipes[id][0], self.data_bucket,
                          self.population, env_constructor))
            for id in range(args.pop_size)
        ]
        for worker in self.evo_workers:
            worker.start()
        self.evo_flag = [True for _ in range(args.pop_size)]

        #Learner rollout workers
        self.task_pipes = [Pipe() for _ in range(args.rollout_size)]
        self.result_pipes = [Pipe() for _ in range(args.rollout_size)]
        self.workers = [
            Process(target=rollout_worker,
                    args=(id, 'pg', self.task_pipes[id][1],
                          self.result_pipes[id][0], self.data_bucket,
                          self.rollout_bucket, env_constructor))
            for id in range(args.rollout_size)
        ]
        for worker in self.workers:
            worker.start()
        self.roll_flag = [True for _ in range(args.rollout_size)]

        #Test bucket
        self.test_bucket = self.manager.list()
        self.test_bucket.append(
            model_constructor.make_model(self.policy_string))

        #5 Test workers
        self.test_task_pipes = [
            Pipe() for _ in range(env_constructor.dummy_env.test_size)
        ]
        self.test_result_pipes = [
            Pipe() for _ in range(env_constructor.dummy_env.test_size)
        ]
        self.test_workers = [
            Process(target=rollout_worker,
                    args=(id, 'test', self.test_task_pipes[id][1],
                          self.test_result_pipes[id][0], None,
                          self.test_bucket, env_constructor))
            for id in range(env_constructor.dummy_env.test_size)
        ]
        for worker in self.test_workers:
            worker.start()
        self.test_flag = False

        #Meta-learning controller (Resource Distribution)
        self.allocation = [
        ]  #Allocation controls the resource allocation across learners
        for i in range(args.rollout_size):
            self.allocation.append(
                i % len(self.portfolio))  #Start uniformly (equal resources)

        #Trackers
        self.best_score = 0.0
        self.gen_frames = 0
        self.total_frames = 0
        self.test_score = None
        self.test_std = None
        self.best_r1_score = 0.0
        self.ep_len = 0
        self.r1_reward = 0
        self.num_footsteps = 0
        self.test_trace = []
Exemple #21
0
    def __init__(self, args, id):
        self.args = args
        self.id = id

        ###Initalize neuroevolution module###
        self.evolver = SSNE(self.args)

        ########Initialize population
        self.manager = Manager()
        self.popn = self.manager.list()
        for _ in range(args.popn_size):
            if args.ps == 'trunk':
                self.popn.append(
                    MultiHeadActor(args.state_dim, args.action_dim,
                                   args.hidden_size, args.config.num_agents))

            else:
                if args.algo_name == 'TD3':
                    self.popn.append(
                        Actor(args.state_dim,
                              args.action_dim,
                              args.hidden_size,
                              policy_type='DeterministicPolicy'))
                else:
                    self.popn.append(
                        Actor(args.state_dim,
                              args.action_dim,
                              args.hidden_size,
                              policy_type='GaussianPolicy'))
            self.popn[-1].eval()

        #### INITIALIZE PG ALGO #####
        if args.ps == 'trunk':

            if self.args.is_matd3 or args.is_maddpg:
                algo_name = 'TD3' if self.args.is_matd3 else 'DDPG'
                self.algo = MATD3(id, algo_name, args.state_dim,
                                  args.action_dim, args.hidden_size,
                                  args.actor_lr, args.critic_lr, args.gamma,
                                  args.tau, args.savetag, args.aux_save,
                                  args.actualize, args.use_gpu,
                                  args.config.num_agents, args.init_w)

            else:
                self.algo = MultiTD3(id, args.algo_name, args.state_dim,
                                     args.action_dim, args.hidden_size,
                                     args.actor_lr, args.critic_lr, args.gamma,
                                     args.tau, args.savetag, args.aux_save,
                                     args.actualize, args.use_gpu,
                                     args.config.num_agents, args.init_w)

        else:
            if args.algo_name == 'TD3':
                self.algo = TD3(id, args.algo_name, args.state_dim,
                                args.action_dim, args.hidden_size,
                                args.actor_lr, args.critic_lr, args.gamma,
                                args.tau, args.savetag, args.aux_save,
                                args.actualize, args.use_gpu, args.init_w)
            else:
                self.algo = SAC(id, args.state_dim, args.action_dim,
                                args.hidden_size, args.gamma, args.critic_lr,
                                args.actor_lr, args.tau, args.alpha,
                                args.target_update_interval, args.savetag,
                                args.aux_save, args.actualize, args.use_gpu)

        #### Rollout Actor is a template used for MP #####
        self.rollout_actor = self.manager.list()

        if args.ps == 'trunk':
            self.rollout_actor.append(
                MultiHeadActor(args.state_dim, args.action_dim,
                               args.hidden_size, args.config.num_agents))
        else:
            if args.algo_name == 'TD3':
                self.rollout_actor.append(
                    Actor(args.state_dim,
                          args.action_dim,
                          args.hidden_size,
                          policy_type='DeterministicPolicy'))
            else:
                self.rollout_actor.append(
                    Actor(args.state_dim,
                          args.action_dim,
                          args.hidden_size,
                          policy_type='GaussianPolicy'))

        #Initalize buffer
        if args.ps == 'trunk':
            self.buffer = [
                Buffer(args.buffer_size,
                       buffer_gpu=False,
                       filter_c=args.filter_c)
                for _ in range(args.config.num_agents)
            ]
        else:
            self.buffer = Buffer(args.buffer_size,
                                 buffer_gpu=False,
                                 filter_c=args.filter_c)

        #Agent metrics
        self.fitnesses = [[] for _ in range(args.popn_size)]

        ###Best Policy HOF####
        self.champ_ind = 0
Exemple #22
0
    def __init__(self, args, model_constructor, env_constructor):
        self.args = args

        #MP TOOLS
        self.manager = Manager()

        #Algo
        self.algo = TD3(model_constructor,
                        actor_lr=args.actor_lr,
                        critic_lr=args.critic_lr,
                        gamma=args.gamma,
                        tau=args.tau,
                        polciy_noise=0.1,
                        policy_noise_clip=0.2,
                        policy_ups_freq=2)

        #Save best policy
        self.best_policy = model_constructor.make_model('Gaussian_FF')
        self.best_policy.stochastic = False

        #Init BUFFER
        self.replay_buffer = Buffer(args.buffer_size)
        self.data_bucket = self.replay_buffer.tuples

        #Initialize Rollout Bucket
        self.rollout_bucket = self.manager.list()
        self.rollout_bucket.append(model_constructor.make_model('Gaussian_FF'))
        for actor in self.rollout_bucket:
            actor.stochastic = False
            actor.eval()

        ############## MULTIPROCESSING TOOLS ###################
        #Learner rollout workers
        self.task_pipes = [Pipe() for _ in range(args.rollout_size)]
        self.result_pipes = [Pipe() for _ in range(args.rollout_size)]
        self.workers = [
            Process(target=rollout_worker,
                    args=(id, 'pg', self.task_pipes[id][1],
                          self.result_pipes[id][0], self.data_bucket,
                          self.rollout_bucket, env_constructor))
            for id in range(args.rollout_size)
        ]
        for worker in self.workers:
            worker.start()
        self.roll_flag = [True for _ in range(args.rollout_size)]

        #Test bucket
        self.test_bucket = self.manager.list()
        self.test_bucket.append(model_constructor.make_model('Gaussian_FF'))
        for actor in self.test_bucket:
            actor.stochastic = False
            actor.eval()

        #5 Test workers
        self.test_task_pipes = [
            Pipe() for _ in range(env_constructor.dummy_env.test_size)
        ]
        self.test_result_pipes = [
            Pipe() for _ in range(env_constructor.dummy_env.test_size)
        ]
        self.test_workers = [
            Process(target=rollout_worker,
                    args=(id, 'test', self.test_task_pipes[id][1],
                          self.test_result_pipes[id][0], None,
                          self.test_bucket, env_constructor))
            for id in range(env_constructor.dummy_env.test_size)
        ]
        for worker in self.test_workers:
            worker.start()
        self.test_flag = False

        #Trackers
        self.best_score = 0.0
        self.gen_frames = 0
        self.total_frames = 0
        self.test_score = None
        self.test_std = None
        self.test_trace = []
        self.ep_len = 0
        self.r1_reward = 0
        self.num_footsteps = 0
    def __init__(self, args, model_constructor, env_constructor):

        self.args = args
        self.policy_string = 'CategoricalPolicy' if env_constructor.is_discrete else 'Gaussian_FF'
        self.manager = Manager()
        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")

        #Evolution
        self.evolver = SSNE(self.args)

        #Initialize population
        self.population = self.manager.list()
        for _ in range(args.pop_size):
            self.population.append(
                model_constructor.make_model(self.policy_string))

        #Save best policy
        self.best_policy = model_constructor.make_model(self.policy_string)

        #PG Learner
        if env_constructor.is_discrete:
            from algos.ddqn import DDQN
            self.learner = DDQN(args, model_constructor)
        else:
            from algos.sac import SAC
            self.learner = SAC(args, model_constructor)

        #Replay Buffer
        self.replay_buffer = Buffer(args.buffer_size)

        #Initialize Rollout Bucket
        self.rollout_bucket = self.manager.list()
        for _ in range(args.rollout_size):
            self.rollout_bucket.append(
                model_constructor.make_model(self.policy_string))

        ############## MULTIPROCESSING TOOLS ###################
        #Evolutionary population Rollout workers
        self.evo_task_pipes = [Pipe() for _ in range(args.pop_size)]
        self.evo_result_pipes = [Pipe() for _ in range(args.pop_size)]
        self.evo_workers = [
            Process(target=rollout_worker,
                    args=(id, 'evo', self.evo_task_pipes[id][1],
                          self.evo_result_pipes[id][0], args.rollout_size > 0,
                          self.population, env_constructor))
            for id in range(args.pop_size)
        ]
        for worker in self.evo_workers:
            worker.start()
        self.evo_flag = [True for _ in range(args.pop_size)]

        #Learner rollout workers
        self.task_pipes = [Pipe() for _ in range(args.rollout_size)]
        self.result_pipes = [Pipe() for _ in range(args.rollout_size)]
        self.workers = [
            Process(target=rollout_worker,
                    args=(id, 'pg', self.task_pipes[id][1],
                          self.result_pipes[id][0], True, self.rollout_bucket,
                          env_constructor)) for id in range(args.rollout_size)
        ]
        for worker in self.workers:
            worker.start()
        self.roll_flag = [True for _ in range(args.rollout_size)]

        #Test bucket
        self.test_bucket = self.manager.list()
        self.test_bucket.append(
            model_constructor.make_model(self.policy_string))

        # Test workers
        self.test_task_pipes = [Pipe() for _ in range(args.num_test)]
        self.test_result_pipes = [Pipe() for _ in range(args.num_test)]
        self.test_workers = [
            Process(target=rollout_worker,
                    args=(id, 'test', self.test_task_pipes[id][1],
                          self.test_result_pipes[id][0], False,
                          self.test_bucket, env_constructor))
            for id in range(args.num_test)
        ]
        for worker in self.test_workers:
            worker.start()
        self.test_flag = False

        #Trackers
        self.best_score = -float('inf')
        self.gen_frames = 0
        self.total_frames = 0
        self.test_score = None
        self.test_std = None
    def __init__(self,
                 args):  # need to intialize rollout_workers to have blue agent
        self.args = args
        self.evolver = SSNE(
            self.args)  # this evolver implements neuro-evolution

        # MP TOOLS
        self.manager = Manager()

        self.mutate_algos = [
            Mutation_Add(self),
            Mutation_Delete(self),
            Mutation_Exchange(self)
        ]  #store all the mutate algorithm objects
        # Genealogy tool
        self.genealogy = Genealogy()

        # Init BUFFER
        self.replay_buffer = Buffer(1000000, self.args.buffer_gpu)

        #if SA_FLAG:
        self.metrics = []
        self.last_portfolio = None
        self.T_max = 30
        self.T = self.T_max
        self.T_min = 0.2
        self.decay_rate = 0.975

        # Initialize population
        self.pop = self.manager.list()
        for _ in range(args.pop_size):
            wwid = self.genealogy.new_id('evo')
            if ALGO == 'SAC':
                self.pop.append(
                    GaussianPolicy(args.state_dim, args.action_dim,
                                   args.hidden_size, wwid))
            elif ALGO == 'TD3':
                self.pop.append(
                    Actor(args.state_dim, args.action_dim, wwid, ALGO))
                # use ALGO to distinguish differe net architecture
            elif ALGO == 'dis' or 'TD3_tennis':
                self.pop.append(
                    Actor(args.state_dim, args.action_dim, wwid, ALGO))
            else:
                assert False, "invalid algorithm type"

        if ALGO == "SAC":
            self.best_policy = GaussianPolicy(args.state_dim, args.action_dim,
                                              args.hidden_size, -1)
        else:
            self.best_policy = Actor(args.state_dim, args.action_dim, -1, ALGO)
            if ALGO == 'dis':
                self.average_policy = AverageActor(args.state_dim,
                                                   args.action_dim,
                                                   -2,
                                                   ALGO,
                                                   self.pop,
                                                   self.replay_buffer,
                                                   args.buffer_gpu,
                                                   args.batch_size,
                                                   iterations=10)
                self.average_policy.share_memory()

        self.best_policy.share_memory()

        # added by macheng, share the best policy accross processes (used as internal belief update models for blue)

        # now we assign shared blue_trainer, we should train this agent such that the roll_out workers are also up to date
        # should make sure that self.best_policy (emergent learner) is also shared
        if ALGO == 'dis' or 'TD3_tennis':
            assert hasattr(
                args, "blue_trainer"
            ), "must have blue_agent trainer to intialize rollout_worker, see line 109, class Parameter definition"
        if ALGO == 'dis':
            trainers = [args.blue_trainer, self.average_policy]
        else:
            trainers = [args.blue_trainer, None
                        ] if ALGO == 'TD3_tennis' else []

        self.trainers = trainers

        self.blue_dqn = args.blue_trainer

        # Turn off gradients and put in eval mod
        for actor in self.pop:
            actor = actor.cpu()
            actor.eval()
        # Intialize portfolio of learners
        self.portfolio = []
        self.portfolio = initialize_portfolio(self.portfolio, self.args,
                                              self.genealogy, PORTFOLIO_ID)
        self.complement_portfolio = [
        ]  #complementary of the portfolio, whatever not in the portfolio should be stored here
        self.total_rollout_bucket = self.manager.list(
        )  #macheng: we use total_rollout_bucket to represents the whole set of rollout models, now rollout_bukcet dynamically resize according to portforlio, for SA
        self.rollout_bucket = self.total_rollout_bucket
        #self.rollout_bucket = self.manager.list()
        #print("rollout_bucker needs to be updated, main.py line 239 ")
        for _ in range(len(self.portfolio)):
            if ALGO == 'SAC':
                self.rollout_bucket.append(
                    GaussianPolicy(args.state_dim, args.action_dim,
                                   args.hidden_size, -1))
            else:
                self.rollout_bucket.append(
                    Actor(args.state_dim, args.action_dim, -1, ALGO))
        # Initialize shared data bucket
        self.data_bucket = self.replay_buffer.tuples

        ############## MULTIPROCESSING TOOLS ###################
        # Evolutionary population Rollout workers
        self.evo_task_pipes = [Pipe() for _ in range(args.pop_size)]
        self.evo_result_pipes = [Pipe() for _ in range(args.pop_size)]
        self.evo_workers = [
            Process(target=rollout_worker,
                    args=(id, 0, self.evo_task_pipes[id][1],
                          self.evo_result_pipes[id][0], False,
                          self.data_bucket, self.pop, ENV_NAME, None, ALGO,
                          self.trainers)) for id in range(args.pop_size)
        ]
        for worker in self.evo_workers:
            worker.start()
        self.evo_flag = [True for _ in range(args.pop_size)]

        # Learner rollout workers
        self.task_pipes = [Pipe() for _ in range(args.rollout_size)]
        self.result_pipes = [Pipe() for _ in range(args.rollout_size)]
        self.workers = [
            Process(target=rollout_worker,
                    args=(id, 1, self.task_pipes[id][1],
                          self.result_pipes[id][0], True, self.data_bucket,
                          self.rollout_bucket, ENV_NAME, args.noise_std, ALGO,
                          self.trainers)) for id in range(args.rollout_size)
        ]
        for worker in self.workers:
            worker.start()
        self.roll_flag = [True for _ in range(args.rollout_size)]

        # Test bucket
        self.test_bucket = self.manager.list()
        if ALGO == 'SAC':
            self.test_bucket.append(
                GaussianPolicy(args.state_dim, args.action_dim,
                               args.hidden_size, -1))
        else:
            self.test_bucket.append(
                Actor(args.state_dim, args.action_dim, -1, ALGO))

        # 5 Test workers
        self.test_task_pipes = [Pipe() for _ in range(TEST_SIZE)]
        self.test_result_pipes = [Pipe() for _ in range(TEST_SIZE)]
        self.test_workers = [
            Process(target=rollout_worker,
                    args=(id, 2, self.test_task_pipes[id][1],
                          self.test_result_pipes[id][0], False, None,
                          self.test_bucket, ENV_NAME, args.noise_std, ALGO,
                          self.trainers)) for id in range(TEST_SIZE)
        ]
        for worker in self.test_workers:
            worker.start()
        self.test_flag = False

        # Meta-learning controller (Resource Distribution)
        self.allocation = [
        ]  #Allocation controls the resource allocation across learners
        for i in range(args.rollout_size):
            self.allocation.append(
                i % len(self.portfolio))  #Start uniformly (equal resources)
        # self.learner_stats = [{'fitnesses': [], 'ep_lens': [], 'value': 0.0, 'visit_count':0} for _ in range(len(self.portfolio))] #Track node statistsitic (each node is a learner), to compute UCB scores

        # Trackers
        self.best_score = -np.inf
        self.gen_frames = 0
        self.total_frames = 0
        self.best_shaped_score = None
        self.test_score = None
        self.test_std = None
Exemple #25
0
 def __getitem__(self, key):
     obj = self.java_obj.get(key)
     if isinstance(obj, org.vertx.java.core.buffer.Buffer):
         obj = Buffer(obj)
     return obj
Exemple #26
0
    def __init__(self, args, model_constructor, env_constructor,
                 observation_space, action_space, env, state_template,
                 test_envs, platform):
        self.args = args
        model_constructor.state_dim += 2
        self.platform = platform

        self.policy_string = self.compute_policy_type()
        self.device = torch.device("cuda" if torch.cuda.is_available(
        ) else "cpu") if self.args.gpu else torch.device('cpu')

        #Evolution
        dram_action = torch.ones((len(state_template.x), 2)) + 1
        state_template.x = torch.cat([state_template.x, dram_action], axis=1)
        self.evolver = MixedSSNE(
            self.args, state_template
        )  #GA(self.args) if args.boltzman else SSNE(self.args)
        self.env_constructor = env_constructor

        self.test_tracker = utils.Tracker(
            self.args.plot_folder,
            ['score_' + self.args.savetag, 'speedup_' + self.args.savetag],
            '.csv')  # Tracker class to log progress
        self.time_tracker = utils.Tracker(self.args.plot_folder, [
            'timed_score_' + self.args.savetag,
            'timed_speedup_' + self.args.savetag
        ], '.csv')
        self.champ_tracker = utils.Tracker(self.args.plot_folder, [
            'champ_score_' + self.args.savetag,
            'champ_speedup_' + self.args.savetag
        ], '.csv')
        self.pg_tracker = utils.Tracker(self.args.plot_folder, [
            'pg_noisy_speedup_' + self.args.savetag,
            'pg_clean_speedup_' + self.args.savetag
        ], '.csv')
        self.migration_tracker = utils.Tracker(self.args.plot_folder, [
            'selection_rate_' + self.args.savetag,
            'elite_rate_' + self.args.savetag
        ], '.csv')

        #Generalization Trackers
        self.r50_tracker = utils.Tracker(self.args.plot_folder, [
            'r50_score_' + self.args.savetag,
            'r50_speedup_' + self.args.savetag
        ], '.csv')
        self.r101_tracker = utils.Tracker(self.args.plot_folder, [
            'r101_score_' + self.args.savetag,
            'r101_speedup_' + self.args.savetag
        ], '.csv')
        self.bert_tracker = utils.Tracker(self.args.plot_folder, [
            'bert_score_' + self.args.savetag,
            'bert_speedup_' + self.args.savetag
        ], '.csv')

        self.r50_frames_tracker = utils.Tracker(self.args.plot_folder, [
            'r50_score_' + self.args.savetag,
            'r50_speedup_' + self.args.savetag
        ], '.csv')
        self.r101_frames_tracker = utils.Tracker(self.args.plot_folder, [
            'r101_score_' + self.args.savetag,
            'r101_speedup_' + self.args.savetag
        ], '.csv')
        self.bert_frames_tracker = utils.Tracker(self.args.plot_folder, [
            'bert_score_' + self.args.savetag,
            'bert_speedup_' + self.args.savetag
        ], '.csv')

        #Genealogy tool
        self.genealogy = Genealogy()

        self.env = env
        self.test_envs = test_envs

        if self.args.use_mp:
            #MP TOOLS
            self.manager = Manager()
            #Initialize Mixed Population
            self.population = self.manager.list()

        else:
            self.population = []

        boltzman_count = int(args.pop_size * args.ratio)
        rest = args.pop_size - boltzman_count
        for _ in range(boltzman_count):
            self.population.append(
                BoltzmannChromosome(model_constructor.num_nodes,
                                    model_constructor.action_dim))

        for _ in range(rest):
            self.population.append(
                model_constructor.make_model(self.policy_string))
            self.population[-1].eval()

        #Save best policy
        self.best_policy = model_constructor.make_model(self.policy_string)

        #Init BUFFER
        self.replay_buffer = Buffer(args.buffer_size, state_template,
                                    action_space,
                                    args.aux_folder + args.savetag)
        self.data_bucket = self.replay_buffer.tuples

        #Intialize portfolio of learners
        self.portfolio = []
        if args.rollout_size > 0:
            self.portfolio = initialize_portfolio(self.portfolio, self.args,
                                                  self.genealogy,
                                                  args.portfolio_id,
                                                  model_constructor)

        #Initialize Rollout Bucket
        self.rollout_bucket = self.manager.list() if self.args.use_mp else []
        for _ in range(len(self.portfolio)):
            self.rollout_bucket.append(
                model_constructor.make_model(self.policy_string))

        if self.args.use_mp:
            ############## MULTIPROCESSING TOOLS ###################
            #Evolutionary population Rollout workers
            data_bucket = self.data_bucket if args.rollout_size > 0 else None  #If Strictly Evo - don;t store data
            self.evo_task_pipes = [Pipe() for _ in range(args.pop_size)]
            self.evo_result_pipes = [Pipe() for _ in range(args.pop_size)]
            self.evo_workers = [
                Process(target=rollout_worker,
                        args=(id, 'evo', self.evo_task_pipes[id][1],
                              self.evo_result_pipes[id][0], data_bucket,
                              self.population, env_constructor))
                for id in range(args.pop_size)
            ]
            for worker in self.evo_workers:
                worker.start()

            #Learner rollout workers
            self.task_pipes = [Pipe() for _ in range(args.rollout_size)]
            self.result_pipes = [Pipe() for _ in range(args.rollout_size)]
            self.workers = [
                Process(target=rollout_worker,
                        args=(id, 'pg', self.task_pipes[id][1],
                              self.result_pipes[id][0], data_bucket,
                              self.rollout_bucket, env_constructor))
                for id in range(args.rollout_size)
            ]
            for worker in self.workers:
                worker.start()

        self.roll_flag = [True for _ in range(args.rollout_size)]
        self.evo_flag = [True for _ in range(args.pop_size)]

        #Meta-learning controller (Resource Distribution)
        self.allocation = [
        ]  #Allocation controls the resource allocation across learners
        for i in range(args.rollout_size):
            self.allocation.append(
                i % len(self.portfolio))  #Start uniformly (equal resources)

        #Trackers
        self.best_score = -float('inf')
        self.gen_frames = 0
        self.total_frames = 0
        self.best_speedup = -float('inf')
        self.champ_type = None
Exemple #27
0
    def __init__(self, args):
        self.args = args
        self.evolver = SSNE(self.args)

        #MP TOOLS
        self.manager = Manager()

        #Genealogy tool
        self.genealogy = Genealogy()

        #Initialize population
        self.pop = self.manager.list()
        for _ in range(args.pop_size):
            wwid = self.genealogy.new_id('evo')
            if ALGO == 'SAC':
                self.pop.append(
                    GaussianPolicy(args.state_dim, args.action_dim,
                                   args.hidden_size, wwid))
            else:
                self.pop.append(Actor(args.state_dim, args.action_dim, wwid))

        if ALGO == "SAC":
            self.best_policy = GaussianPolicy(args.state_dim, args.action_dim,
                                              args.hidden_size, -1)
        else:
            self.best_policy = Actor(args.state_dim, args.action_dim, -1)

        #Turn off gradients and put in eval mod
        for actor in self.pop:
            actor = actor.cpu()
            actor.eval()

        #Init BUFFER
        self.replay_buffer = Buffer(1000000, self.args.buffer_gpu)

        #Intialize portfolio of learners
        self.portfolio = []
        self.portfolio = initialize_portfolio(self.portfolio, self.args,
                                              self.genealogy, PORTFOLIO_ID)
        self.rollout_bucket = self.manager.list()
        for _ in range(len(self.portfolio)):
            if ALGO == 'SAC':
                self.rollout_bucket.append(
                    GaussianPolicy(args.state_dim, args.action_dim,
                                   args.hidden_size, -1))
            else:
                self.rollout_bucket.append(
                    Actor(args.state_dim, args.action_dim, -1))

        # Initialize shared data bucket
        self.data_bucket = self.replay_buffer.tuples

        ############## MULTIPROCESSING TOOLS ###################

        #Evolutionary population Rollout workers
        self.evo_task_pipes = [Pipe() for _ in range(args.pop_size)]
        self.evo_result_pipes = [Pipe() for _ in range(args.pop_size)]
        self.evo_workers = [
            Process(target=rollout_worker,
                    args=(id, self.evo_task_pipes[id][1],
                          self.evo_result_pipes[id][0], False,
                          self.data_bucket, self.pop, ENV_NAME, None, ALGO))
            for id in range(args.pop_size)
        ]
        for worker in self.evo_workers:
            worker.start()
        self.evo_flag = [True for _ in range(args.pop_size)]

        #Learner rollout workers
        self.task_pipes = [Pipe() for _ in range(args.rollout_size)]
        self.result_pipes = [Pipe() for _ in range(args.rollout_size)]
        self.workers = [
            Process(target=rollout_worker,
                    args=(id, self.task_pipes[id][1], self.result_pipes[id][0],
                          True, self.data_bucket, self.rollout_bucket,
                          ENV_NAME, args.noise_std, ALGO))
            for id in range(args.rollout_size)
        ]
        for worker in self.workers:
            worker.start()
        self.roll_flag = [True for _ in range(args.rollout_size)]

        #Test bucket
        self.test_bucket = self.manager.list()
        if ALGO == 'SAC':
            self.test_bucket.append(
                GaussianPolicy(args.state_dim, args.action_dim,
                               args.hidden_size, -1))
        else:
            self.test_bucket.append(Actor(args.state_dim, args.action_dim, -1))

        #5 Test workers
        self.test_task_pipes = [Pipe() for _ in range(TEST_SIZE)]
        self.test_result_pipes = [Pipe() for _ in range(TEST_SIZE)]
        self.test_workers = [
            Process(target=rollout_worker,
                    args=(id, self.test_task_pipes[id][1],
                          self.test_result_pipes[id][0], False, None,
                          self.test_bucket, ENV_NAME, args.noise_std, ALGO))
            for id in range(TEST_SIZE)
        ]
        for worker in self.test_workers:
            worker.start()
        self.test_flag = False

        #Meta-learning controller (Resource Distribution)
        self.allocation = [
        ]  #Allocation controls the resource allocation across learners
        for i in range(args.rollout_size):
            self.allocation.append(
                i % len(self.portfolio))  #Start uniformly (equal resources)
        #self.learner_stats = [{'fitnesses': [], 'ep_lens': [], 'value': 0.0, 'visit_count':0} for _ in range(len(self.portfolio))] #Track node statistsitic (each node is a learner), to compute UCB scores

        #Trackers
        self.best_score = 0.0
        self.gen_frames = 0
        self.total_frames = 0
        self.best_shaped_score = None
        self.test_score = None
        self.test_std = None
Exemple #28
0
 def gen_buffer(size):
     j_buff = org.vertx.java.framework.TestUtils.generateRandomBuffer(size)
     return Buffer(j_buff)