def buffer(self): """Get the current buffer displayed in this window. Return (Buffer): The current buffer, None if there is no current buffer. """ return Buffer(symbols.win.get_buffer(self.struct))
def __init__(self, message): self.java_obj = message if isinstance(message.body, org.vertx.java.core.json.JsonObject): self.body = map_from_java(message.body.toMap()) elif isinstance(message.body, org.vertx.java.core.buffer.Buffer): self.body = Buffer(message.body) else: self.body = map_from_java(message.body)
def __init__(self, args): self.args = args self.agents = [pg.TD3(args) for _ in range(self.args.num_rover)] #Load to GPU for ag in self.agents: ag.to_cuda() ###### Buffer is agent's own data self generated via its rollouts ######### self.buffers = [Buffer() for _ in range(self.args.num_rover)] self.noise_gen = OU_handle.get_list_generators(NUM_WORKERS, args.action_dim) ######### Multiprocessing TOOLS ######### self.manager = Manager() self.data_bucket = [ self.manager.list() for _ in range(args.num_rover) ] #Experience list stores experiences from all processes ######### TRAIN ROLLOUTS WITH ACTION NOISE ############ self.models_bucket = self.manager.list() model_template = models.Actor(args) for _ in range(self.args.num_rover): self.models_bucket.append(models.Actor(args)) self.task_pipes = [Pipe() for _ in range(NUM_WORKERS)] self.result_pipes = [Pipe() for _ in range(NUM_WORKERS)] self.train_workers = [ Process(target=rollout_worker, args=(self.args, i, self.task_pipes[i][1], self.result_pipes[i][0], self.noise_gen[i], self.data_bucket, self.models_bucket, model_template)) for i in range(NUM_WORKERS) ] for worker in self.train_workers: worker.start() ######## TEST ROLLOUT POLICY ############ self.test_task_pipe = Pipe() self.test_result_pipe = Pipe() self.test_worker = Process(target=rollout_worker, args=(self.args, 0, self.test_task_pipe[1], self.test_result_pipe[0], None, self.data_bucket, self.models_bucket, model_template)) self.test_worker.start() #### STATS AND TRACKING WHICH ROLLOUT IS DONE ###### self.best_policy = models.Actor(args) #Best policy found by PF yet self.best_score = -999 self.test_score = None self.test_eval_flag = True self.rollout_scores = [None for _ in range(NUM_WORKERS)] self.best_rollout_score = -999 self.train_eval_flag = [True for _ in range(NUM_WORKERS)] self.update_budget = 0
def __init__( self, CERL_agent, num_workers, trainers, pomdp_adv=False ): #trainers first is the blue agent and second is the red model self.num_workers = num_workers self.trainers = trainers self.pomdp_adv = pomdp_adv self.args = CERL_agent.args self.drqn = CERL_agent.args.drqn #denote if blue uses drqn if self.pomdp_adv: self.trainers = [trainers[0], None] #make sure the red model is never used self.buffer_gpu = CERL_agent.args.buffer_gpu self.batch_size = CERL_agent.args.batch_size self.algo = CERL_agent.args.algo self.state_dim = CERL_agent.args.state_dim self.action_dim = CERL_agent.args.action_dim self.buffer = Buffer(BUFFER_SIZE, self.buffer_gpu) #initialize own replay buffer self.data_bucket = self.buffer.tuples self.evo_task_pipes = [Pipe() for _ in range(self.num_workers)] self.evo_result_pipes = [Pipe() for _ in range(self.num_workers)] self.actual_red_worker = Actor( CERL_agent.args.state_dim, CERL_agent.args.action_dim, -1, 'dis') #this model is shared accross the workers self.actual_red_worker.share_memory() self.td3args = { 'policy_noise': 0.2, 'policy_noise_clip': 0.5, 'policy_ups_freq': 2, 'action_low': CERL_agent.args.action_low, 'action_high': CERL_agent.args.action_high, 'cerl_args': self.args } self.renew_learner( ) #now we are not using new learner for each iteration self.rollout_bucket = [ self.actual_red_worker for i in range(num_workers) ] self.workers = [ Process(target=rollout_worker, args=(id, 3, self.evo_task_pipes[id][1], self.evo_result_pipes[id][0], False, self.data_bucket, self.rollout_bucket, 'dummy_name', None, 'dis', self.trainers, False, self.pomdp_adv)) for id in range(num_workers) ] for worker in self.workers: worker.start() self.evo_flag = [True for _ in range(self.num_workers)]
def each(self, func): """Call the func for every element of the set Keyword arguments: @param func: The function to call. """ iter = self.java_obj.iterator() while iter.hasNext(): obj = iter.next() if isinstance(obj, org.vertx.java.core.buffer.Buffer): obj = Buffer(obj) func(obj)
def __init__(self, wwid, algo_name, state_dim, action_dim, actor_lr, critic_lr, gamma, tau, init_w = True, **td3args): self.td3args = td3args; self.id = id self.wwid = wwid self.algo = Off_Policy_Algo(wwid, algo_name, state_dim, action_dim, actor_lr, critic_lr, gamma, tau, init_w) self.args = td3args['cerl_args'] #LEARNER STATISTICS self.fitnesses = [] self.ep_lens = [] self.value = None self.visit_count = 0 self.private_replay_buffer = Buffer(1000000, self.args.buffer_gpu) #
def _init(self, bufptr): new = self.instclass() self.instances[id(new)] = new if hasattr(new, "init"): rtn = new.init(Buffer(bufptr)) if isinstance(rtn, int): return rtn return id(new)
def __init__(self, args, model_constructor, env_constructor): self.args = args #MP TOOLS self.manager = Manager() #Algo sac_keyargs = {} sac_keyargs['autotune'] = args.autotune sac_keyargs['entropy'] = True self.algo = SAC(args, model_constructor, args.gamma, **sac_keyargs) # #Save best policy # self.best_policy = model_constructor.make_model('actor') #Init BUFFER self.replay_buffer = Buffer(args.buffer_size) self.data_bucket = self.replay_buffer.tuples #Initialize Rollout Bucket self.rollout_bucket = self.manager.list() self.rollout_bucket.append(model_constructor.make_model('Gaussian_FF')) ############## MULTIPROCESSING TOOLS ################### #Learner rollout workers self.task_pipes = [Pipe() for _ in range(args.rollout_size)] self.result_pipes = [Pipe() for _ in range(args.rollout_size)] self.workers = [Process(target=rollout_worker, args=(id, 'pg', self.task_pipes[id][1], self.result_pipes[id][0], self.data_bucket, self.rollout_bucket, env_constructor)) for id in range(args.rollout_size)] for worker in self.workers: worker.start() self.roll_flag = [True for _ in range(args.rollout_size)] #Test bucket self.test_bucket = self.manager.list() self.test_bucket.append(model_constructor.make_model('Gaussian_FF')) #5 Test workers self.test_task_pipes = [Pipe() for _ in range(env_constructor.dummy_env.test_size)] self.test_result_pipes = [Pipe() for _ in range(env_constructor.dummy_env.test_size)] self.test_workers = [Process(target=rollout_worker, args=(id, 'test', self.test_task_pipes[id][1], self.test_result_pipes[id][0], None, self.test_bucket, env_constructor)) for id in range(env_constructor.dummy_env.test_size)] for worker in self.test_workers: worker.start() self.test_flag = False #Trackers self.best_score = 0.0; self.gen_frames = 0; self.total_frames = 0; self.test_score = None; self.test_std = None; self.test_trace = []; self.rollout_fits_trace = [] self.ep_len = 0 self.r1_reward = 0 self.num_footsteps = 0 self.best_shaped_score = 0.0
async def test_get_frame_complex(): buffer = Buffer() last_time = await buffer.get_last_time() # Derive the start time as a function of the # end time minus the window size start = round(last_time - (int(DC.window_size) * 60)) assets = await buffer.get_random_assets(DC.asset_num) f = await buffer.get_frame_complex(start, last_time, ['ETH']) print(f[[ 'ask_price_0', 'ask_quantity_0', 'ask_price_1', 'ask_quantity_1', 'close' ]].to_string())
def map_from_vertx(value): """Converts a Vert.x type to a Jython type.""" if value is None: return value if isinstance(value, Map): return map_map_from_java(value) elif isinstance(value, Set): return map_set_from_java(value) elif isinstance(value, Collection): return map_collection_from_java(value) elif isinstance(value, org.vertx.java.core.json.JsonObject): return map_object_from_java(value) elif isinstance(value, org.vertx.java.core.json.JsonArray): return map_array_from_java(value) elif isinstance(value, org.vertx.java.core.buffer.Buffer): return Buffer(value) return value
def __init__(self, args, id): self.args = args self.id = id ###Initalize neuroevolution module### self.evolver = SSNE(self.args) ########Initialize population self.manager = Manager() self.popn = self.manager.list() for _ in range(args.popn_size): self.popn.append(MultiHeadActor(args.state_dim, args.action_dim, args.hidden_size, args.config.num_agents)) self.popn[-1].eval() #### INITIALIZE PG ALGO ##### if self.args.is_matd3 or args.is_maddpg: algo_name = 'TD3' if self.args.is_matd3 else 'DDPG' self.algo = MATD3(id, algo_name, args.state_dim, args.action_dim, args.hidden_size, args.actor_lr, args.critic_lr, args.gamma, args.tau, args.savetag, args.aux_save, args.use_gpu, args.config.num_agents, args.init_w) else: self.algo = MultiTD3(id, 'TD3', args.state_dim, args.action_dim, args.hidden_size, args.actor_lr, args.critic_lr, args.gamma, args.tau, args.savetag, args.aux_save, args.use_gpu, args.config.num_agents, args.init_w) #### Rollout Actor is a template used for MP ##### self.rollout_actor = self.manager.list() self.rollout_actor.append(MultiHeadActor(args.state_dim, args.action_dim, args.hidden_size, args.config.num_agents)) #Initalize buffer self.buffer = [Buffer(args.buffer_size, buffer_gpu=False) for _ in range(args.config.num_agents)] #Agent metrics self.fitnesses = [[] for _ in range(args.popn_size)] ###Best Policy HOF#### self.champ_ind = 0
def __init__(self, logger, redis_host, redis_port, redis_db): DataBase.__init__(self, logger, redis_host, redis_port, redis_db) self.pubsub = pubsub.Pubsub( logger, redis.Redis(host=redis_host, port=redis_port, db=redis_db)) self.balancer = balancer.Balancer(logger, self.rc, self.pubsub) self.cache = cache.Cache(logger, self.rc) self.provider = { 'facebook': provider_data.ProviderData(self.rc, 'facebook'), 'twitter': provider_data.ProviderData(self.rc, 'twitter'), 'tumblr': provider_data.ProviderData(self.rc, 'tumblr'), 'flickr': provider_data.ProviderData(self.rc, 'flickr'), '500px': provider_data.ProviderData(self.rc, '500px'), 'linkedin': provider_data.ProviderData(self.rc, 'linkedin'), } self.facebook = self.provider['facebook'] self.twitter = self.provider['twitter'] self.tumblr = self.provider['tumblr'] self.flickr = self.provider['flickr'] self.px500 = self.provider['500px'] self.linkedin = self.provider['linkedin'] self.filter = FilterData(self.rc) self.buffer = Buffer(logger, self.rc, self.pubsub)
def buffer(self): """Return the buffer this cursor is associated with.""" return Buffer(symbols.cursor.get_buffer(self.struct))
def handle(self, buffer): """Call the handler after buffer parsed""" self.handler(Buffer(buffer))
sys.path.insert(0, parentdir) import constants.environment as env_const from core.exceptions import InvalidUsage, RouteNotFound from core.buffer import Buffer from core.test_buffer import TestBuffer from core.registry import Registry ########## App setup ########## app = Application() r = app.router ########## Instantiate globals ########## env_type = os.environ.get('ENV_TYPE') if env_type == "production": buffer = Buffer() else: buffer = TestBuffer() registry = Registry(buffer=buffer) ########## Error handling ########## def get_required_param(json, param): if json is None: logger.info("Request is not a valid json") raise InvalidUsage("Request is not a valid json") value = json.get(param, None) if (value is None) or (value == '') or (value == []): logger.info("A required request parameter '{}' had value {}".format(
def map_buffer_from_java(obj): """Converts a Buffer to a Python Buffer.""" return Buffer(obj)
def converter(buffer): return Buffer(buffer)
def handle(self, buffer): self.handler(Buffer(buffer))
def data(self): """Returns the data as buffer that was received.""" if self.buffer is None: self.buffer = Buffer(self.packet.data()) return self.buffer
def __init__(self, args, model_constructor, env_constructor): self.args = args self.policy_string = self.compute_policy_type() #Evolution self.evolver = SSNE(self.args) #MP TOOLS self.manager = Manager() #Genealogy tool self.genealogy = Genealogy() #Initialize population self.population = self.manager.list() seed = True for _ in range(args.pop_size): self.population.append( model_constructor.make_model(self.policy_string, seed=seed)) seed = False #SEED #self.population[0].load_state_dict(torch.load('Results/Auxiliary/_bestcerl_td3_s2019_roll10_pop10_portfolio10')) #Save best policy self.best_policy = model_constructor.make_model(self.policy_string) #Turn off gradients and put in eval mod for actor in self.population: actor = actor.cpu() actor.eval() #Init BUFFER self.replay_buffer = Buffer(args.buffer_size) self.data_bucket = self.replay_buffer.tuples #Intialize portfolio of learners self.portfolio = [] self.portfolio = initialize_portfolio(self.portfolio, self.args, self.genealogy, args.portfolio_id, model_constructor) #Initialize Rollout Bucket self.rollout_bucket = self.manager.list() for _ in range(len(self.portfolio)): self.rollout_bucket.append( model_constructor.make_model(self.policy_string)) ############## MULTIPROCESSING TOOLS ################### #Evolutionary population Rollout workers self.evo_task_pipes = [Pipe() for _ in range(args.pop_size)] self.evo_result_pipes = [Pipe() for _ in range(args.pop_size)] self.evo_workers = [ Process(target=rollout_worker, args=(id, 'evo', self.evo_task_pipes[id][1], self.evo_result_pipes[id][0], self.data_bucket, self.population, env_constructor)) for id in range(args.pop_size) ] for worker in self.evo_workers: worker.start() self.evo_flag = [True for _ in range(args.pop_size)] #Learner rollout workers self.task_pipes = [Pipe() for _ in range(args.rollout_size)] self.result_pipes = [Pipe() for _ in range(args.rollout_size)] self.workers = [ Process(target=rollout_worker, args=(id, 'pg', self.task_pipes[id][1], self.result_pipes[id][0], self.data_bucket, self.rollout_bucket, env_constructor)) for id in range(args.rollout_size) ] for worker in self.workers: worker.start() self.roll_flag = [True for _ in range(args.rollout_size)] #Test bucket self.test_bucket = self.manager.list() self.test_bucket.append( model_constructor.make_model(self.policy_string)) #5 Test workers self.test_task_pipes = [ Pipe() for _ in range(env_constructor.dummy_env.test_size) ] self.test_result_pipes = [ Pipe() for _ in range(env_constructor.dummy_env.test_size) ] self.test_workers = [ Process(target=rollout_worker, args=(id, 'test', self.test_task_pipes[id][1], self.test_result_pipes[id][0], None, self.test_bucket, env_constructor)) for id in range(env_constructor.dummy_env.test_size) ] for worker in self.test_workers: worker.start() self.test_flag = False #Meta-learning controller (Resource Distribution) self.allocation = [ ] #Allocation controls the resource allocation across learners for i in range(args.rollout_size): self.allocation.append( i % len(self.portfolio)) #Start uniformly (equal resources) #Trackers self.best_score = 0.0 self.gen_frames = 0 self.total_frames = 0 self.test_score = None self.test_std = None self.best_r1_score = 0.0 self.ep_len = 0 self.r1_reward = 0 self.num_footsteps = 0 self.test_trace = []
def __init__(self, args, id): self.args = args self.id = id ###Initalize neuroevolution module### self.evolver = SSNE(self.args) ########Initialize population self.manager = Manager() self.popn = self.manager.list() for _ in range(args.popn_size): if args.ps == 'trunk': self.popn.append( MultiHeadActor(args.state_dim, args.action_dim, args.hidden_size, args.config.num_agents)) else: if args.algo_name == 'TD3': self.popn.append( Actor(args.state_dim, args.action_dim, args.hidden_size, policy_type='DeterministicPolicy')) else: self.popn.append( Actor(args.state_dim, args.action_dim, args.hidden_size, policy_type='GaussianPolicy')) self.popn[-1].eval() #### INITIALIZE PG ALGO ##### if args.ps == 'trunk': if self.args.is_matd3 or args.is_maddpg: algo_name = 'TD3' if self.args.is_matd3 else 'DDPG' self.algo = MATD3(id, algo_name, args.state_dim, args.action_dim, args.hidden_size, args.actor_lr, args.critic_lr, args.gamma, args.tau, args.savetag, args.aux_save, args.actualize, args.use_gpu, args.config.num_agents, args.init_w) else: self.algo = MultiTD3(id, args.algo_name, args.state_dim, args.action_dim, args.hidden_size, args.actor_lr, args.critic_lr, args.gamma, args.tau, args.savetag, args.aux_save, args.actualize, args.use_gpu, args.config.num_agents, args.init_w) else: if args.algo_name == 'TD3': self.algo = TD3(id, args.algo_name, args.state_dim, args.action_dim, args.hidden_size, args.actor_lr, args.critic_lr, args.gamma, args.tau, args.savetag, args.aux_save, args.actualize, args.use_gpu, args.init_w) else: self.algo = SAC(id, args.state_dim, args.action_dim, args.hidden_size, args.gamma, args.critic_lr, args.actor_lr, args.tau, args.alpha, args.target_update_interval, args.savetag, args.aux_save, args.actualize, args.use_gpu) #### Rollout Actor is a template used for MP ##### self.rollout_actor = self.manager.list() if args.ps == 'trunk': self.rollout_actor.append( MultiHeadActor(args.state_dim, args.action_dim, args.hidden_size, args.config.num_agents)) else: if args.algo_name == 'TD3': self.rollout_actor.append( Actor(args.state_dim, args.action_dim, args.hidden_size, policy_type='DeterministicPolicy')) else: self.rollout_actor.append( Actor(args.state_dim, args.action_dim, args.hidden_size, policy_type='GaussianPolicy')) #Initalize buffer if args.ps == 'trunk': self.buffer = [ Buffer(args.buffer_size, buffer_gpu=False, filter_c=args.filter_c) for _ in range(args.config.num_agents) ] else: self.buffer = Buffer(args.buffer_size, buffer_gpu=False, filter_c=args.filter_c) #Agent metrics self.fitnesses = [[] for _ in range(args.popn_size)] ###Best Policy HOF#### self.champ_ind = 0
def __init__(self, args, model_constructor, env_constructor): self.args = args #MP TOOLS self.manager = Manager() #Algo self.algo = TD3(model_constructor, actor_lr=args.actor_lr, critic_lr=args.critic_lr, gamma=args.gamma, tau=args.tau, polciy_noise=0.1, policy_noise_clip=0.2, policy_ups_freq=2) #Save best policy self.best_policy = model_constructor.make_model('Gaussian_FF') self.best_policy.stochastic = False #Init BUFFER self.replay_buffer = Buffer(args.buffer_size) self.data_bucket = self.replay_buffer.tuples #Initialize Rollout Bucket self.rollout_bucket = self.manager.list() self.rollout_bucket.append(model_constructor.make_model('Gaussian_FF')) for actor in self.rollout_bucket: actor.stochastic = False actor.eval() ############## MULTIPROCESSING TOOLS ################### #Learner rollout workers self.task_pipes = [Pipe() for _ in range(args.rollout_size)] self.result_pipes = [Pipe() for _ in range(args.rollout_size)] self.workers = [ Process(target=rollout_worker, args=(id, 'pg', self.task_pipes[id][1], self.result_pipes[id][0], self.data_bucket, self.rollout_bucket, env_constructor)) for id in range(args.rollout_size) ] for worker in self.workers: worker.start() self.roll_flag = [True for _ in range(args.rollout_size)] #Test bucket self.test_bucket = self.manager.list() self.test_bucket.append(model_constructor.make_model('Gaussian_FF')) for actor in self.test_bucket: actor.stochastic = False actor.eval() #5 Test workers self.test_task_pipes = [ Pipe() for _ in range(env_constructor.dummy_env.test_size) ] self.test_result_pipes = [ Pipe() for _ in range(env_constructor.dummy_env.test_size) ] self.test_workers = [ Process(target=rollout_worker, args=(id, 'test', self.test_task_pipes[id][1], self.test_result_pipes[id][0], None, self.test_bucket, env_constructor)) for id in range(env_constructor.dummy_env.test_size) ] for worker in self.test_workers: worker.start() self.test_flag = False #Trackers self.best_score = 0.0 self.gen_frames = 0 self.total_frames = 0 self.test_score = None self.test_std = None self.test_trace = [] self.ep_len = 0 self.r1_reward = 0 self.num_footsteps = 0
def __init__(self, args, model_constructor, env_constructor): self.args = args self.policy_string = 'CategoricalPolicy' if env_constructor.is_discrete else 'Gaussian_FF' self.manager = Manager() self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") #Evolution self.evolver = SSNE(self.args) #Initialize population self.population = self.manager.list() for _ in range(args.pop_size): self.population.append( model_constructor.make_model(self.policy_string)) #Save best policy self.best_policy = model_constructor.make_model(self.policy_string) #PG Learner if env_constructor.is_discrete: from algos.ddqn import DDQN self.learner = DDQN(args, model_constructor) else: from algos.sac import SAC self.learner = SAC(args, model_constructor) #Replay Buffer self.replay_buffer = Buffer(args.buffer_size) #Initialize Rollout Bucket self.rollout_bucket = self.manager.list() for _ in range(args.rollout_size): self.rollout_bucket.append( model_constructor.make_model(self.policy_string)) ############## MULTIPROCESSING TOOLS ################### #Evolutionary population Rollout workers self.evo_task_pipes = [Pipe() for _ in range(args.pop_size)] self.evo_result_pipes = [Pipe() for _ in range(args.pop_size)] self.evo_workers = [ Process(target=rollout_worker, args=(id, 'evo', self.evo_task_pipes[id][1], self.evo_result_pipes[id][0], args.rollout_size > 0, self.population, env_constructor)) for id in range(args.pop_size) ] for worker in self.evo_workers: worker.start() self.evo_flag = [True for _ in range(args.pop_size)] #Learner rollout workers self.task_pipes = [Pipe() for _ in range(args.rollout_size)] self.result_pipes = [Pipe() for _ in range(args.rollout_size)] self.workers = [ Process(target=rollout_worker, args=(id, 'pg', self.task_pipes[id][1], self.result_pipes[id][0], True, self.rollout_bucket, env_constructor)) for id in range(args.rollout_size) ] for worker in self.workers: worker.start() self.roll_flag = [True for _ in range(args.rollout_size)] #Test bucket self.test_bucket = self.manager.list() self.test_bucket.append( model_constructor.make_model(self.policy_string)) # Test workers self.test_task_pipes = [Pipe() for _ in range(args.num_test)] self.test_result_pipes = [Pipe() for _ in range(args.num_test)] self.test_workers = [ Process(target=rollout_worker, args=(id, 'test', self.test_task_pipes[id][1], self.test_result_pipes[id][0], False, self.test_bucket, env_constructor)) for id in range(args.num_test) ] for worker in self.test_workers: worker.start() self.test_flag = False #Trackers self.best_score = -float('inf') self.gen_frames = 0 self.total_frames = 0 self.test_score = None self.test_std = None
def __init__(self, args): # need to intialize rollout_workers to have blue agent self.args = args self.evolver = SSNE( self.args) # this evolver implements neuro-evolution # MP TOOLS self.manager = Manager() self.mutate_algos = [ Mutation_Add(self), Mutation_Delete(self), Mutation_Exchange(self) ] #store all the mutate algorithm objects # Genealogy tool self.genealogy = Genealogy() # Init BUFFER self.replay_buffer = Buffer(1000000, self.args.buffer_gpu) #if SA_FLAG: self.metrics = [] self.last_portfolio = None self.T_max = 30 self.T = self.T_max self.T_min = 0.2 self.decay_rate = 0.975 # Initialize population self.pop = self.manager.list() for _ in range(args.pop_size): wwid = self.genealogy.new_id('evo') if ALGO == 'SAC': self.pop.append( GaussianPolicy(args.state_dim, args.action_dim, args.hidden_size, wwid)) elif ALGO == 'TD3': self.pop.append( Actor(args.state_dim, args.action_dim, wwid, ALGO)) # use ALGO to distinguish differe net architecture elif ALGO == 'dis' or 'TD3_tennis': self.pop.append( Actor(args.state_dim, args.action_dim, wwid, ALGO)) else: assert False, "invalid algorithm type" if ALGO == "SAC": self.best_policy = GaussianPolicy(args.state_dim, args.action_dim, args.hidden_size, -1) else: self.best_policy = Actor(args.state_dim, args.action_dim, -1, ALGO) if ALGO == 'dis': self.average_policy = AverageActor(args.state_dim, args.action_dim, -2, ALGO, self.pop, self.replay_buffer, args.buffer_gpu, args.batch_size, iterations=10) self.average_policy.share_memory() self.best_policy.share_memory() # added by macheng, share the best policy accross processes (used as internal belief update models for blue) # now we assign shared blue_trainer, we should train this agent such that the roll_out workers are also up to date # should make sure that self.best_policy (emergent learner) is also shared if ALGO == 'dis' or 'TD3_tennis': assert hasattr( args, "blue_trainer" ), "must have blue_agent trainer to intialize rollout_worker, see line 109, class Parameter definition" if ALGO == 'dis': trainers = [args.blue_trainer, self.average_policy] else: trainers = [args.blue_trainer, None ] if ALGO == 'TD3_tennis' else [] self.trainers = trainers self.blue_dqn = args.blue_trainer # Turn off gradients and put in eval mod for actor in self.pop: actor = actor.cpu() actor.eval() # Intialize portfolio of learners self.portfolio = [] self.portfolio = initialize_portfolio(self.portfolio, self.args, self.genealogy, PORTFOLIO_ID) self.complement_portfolio = [ ] #complementary of the portfolio, whatever not in the portfolio should be stored here self.total_rollout_bucket = self.manager.list( ) #macheng: we use total_rollout_bucket to represents the whole set of rollout models, now rollout_bukcet dynamically resize according to portforlio, for SA self.rollout_bucket = self.total_rollout_bucket #self.rollout_bucket = self.manager.list() #print("rollout_bucker needs to be updated, main.py line 239 ") for _ in range(len(self.portfolio)): if ALGO == 'SAC': self.rollout_bucket.append( GaussianPolicy(args.state_dim, args.action_dim, args.hidden_size, -1)) else: self.rollout_bucket.append( Actor(args.state_dim, args.action_dim, -1, ALGO)) # Initialize shared data bucket self.data_bucket = self.replay_buffer.tuples ############## MULTIPROCESSING TOOLS ################### # Evolutionary population Rollout workers self.evo_task_pipes = [Pipe() for _ in range(args.pop_size)] self.evo_result_pipes = [Pipe() for _ in range(args.pop_size)] self.evo_workers = [ Process(target=rollout_worker, args=(id, 0, self.evo_task_pipes[id][1], self.evo_result_pipes[id][0], False, self.data_bucket, self.pop, ENV_NAME, None, ALGO, self.trainers)) for id in range(args.pop_size) ] for worker in self.evo_workers: worker.start() self.evo_flag = [True for _ in range(args.pop_size)] # Learner rollout workers self.task_pipes = [Pipe() for _ in range(args.rollout_size)] self.result_pipes = [Pipe() for _ in range(args.rollout_size)] self.workers = [ Process(target=rollout_worker, args=(id, 1, self.task_pipes[id][1], self.result_pipes[id][0], True, self.data_bucket, self.rollout_bucket, ENV_NAME, args.noise_std, ALGO, self.trainers)) for id in range(args.rollout_size) ] for worker in self.workers: worker.start() self.roll_flag = [True for _ in range(args.rollout_size)] # Test bucket self.test_bucket = self.manager.list() if ALGO == 'SAC': self.test_bucket.append( GaussianPolicy(args.state_dim, args.action_dim, args.hidden_size, -1)) else: self.test_bucket.append( Actor(args.state_dim, args.action_dim, -1, ALGO)) # 5 Test workers self.test_task_pipes = [Pipe() for _ in range(TEST_SIZE)] self.test_result_pipes = [Pipe() for _ in range(TEST_SIZE)] self.test_workers = [ Process(target=rollout_worker, args=(id, 2, self.test_task_pipes[id][1], self.test_result_pipes[id][0], False, None, self.test_bucket, ENV_NAME, args.noise_std, ALGO, self.trainers)) for id in range(TEST_SIZE) ] for worker in self.test_workers: worker.start() self.test_flag = False # Meta-learning controller (Resource Distribution) self.allocation = [ ] #Allocation controls the resource allocation across learners for i in range(args.rollout_size): self.allocation.append( i % len(self.portfolio)) #Start uniformly (equal resources) # self.learner_stats = [{'fitnesses': [], 'ep_lens': [], 'value': 0.0, 'visit_count':0} for _ in range(len(self.portfolio))] #Track node statistsitic (each node is a learner), to compute UCB scores # Trackers self.best_score = -np.inf self.gen_frames = 0 self.total_frames = 0 self.best_shaped_score = None self.test_score = None self.test_std = None
def __getitem__(self, key): obj = self.java_obj.get(key) if isinstance(obj, org.vertx.java.core.buffer.Buffer): obj = Buffer(obj) return obj
def __init__(self, args, model_constructor, env_constructor, observation_space, action_space, env, state_template, test_envs, platform): self.args = args model_constructor.state_dim += 2 self.platform = platform self.policy_string = self.compute_policy_type() self.device = torch.device("cuda" if torch.cuda.is_available( ) else "cpu") if self.args.gpu else torch.device('cpu') #Evolution dram_action = torch.ones((len(state_template.x), 2)) + 1 state_template.x = torch.cat([state_template.x, dram_action], axis=1) self.evolver = MixedSSNE( self.args, state_template ) #GA(self.args) if args.boltzman else SSNE(self.args) self.env_constructor = env_constructor self.test_tracker = utils.Tracker( self.args.plot_folder, ['score_' + self.args.savetag, 'speedup_' + self.args.savetag], '.csv') # Tracker class to log progress self.time_tracker = utils.Tracker(self.args.plot_folder, [ 'timed_score_' + self.args.savetag, 'timed_speedup_' + self.args.savetag ], '.csv') self.champ_tracker = utils.Tracker(self.args.plot_folder, [ 'champ_score_' + self.args.savetag, 'champ_speedup_' + self.args.savetag ], '.csv') self.pg_tracker = utils.Tracker(self.args.plot_folder, [ 'pg_noisy_speedup_' + self.args.savetag, 'pg_clean_speedup_' + self.args.savetag ], '.csv') self.migration_tracker = utils.Tracker(self.args.plot_folder, [ 'selection_rate_' + self.args.savetag, 'elite_rate_' + self.args.savetag ], '.csv') #Generalization Trackers self.r50_tracker = utils.Tracker(self.args.plot_folder, [ 'r50_score_' + self.args.savetag, 'r50_speedup_' + self.args.savetag ], '.csv') self.r101_tracker = utils.Tracker(self.args.plot_folder, [ 'r101_score_' + self.args.savetag, 'r101_speedup_' + self.args.savetag ], '.csv') self.bert_tracker = utils.Tracker(self.args.plot_folder, [ 'bert_score_' + self.args.savetag, 'bert_speedup_' + self.args.savetag ], '.csv') self.r50_frames_tracker = utils.Tracker(self.args.plot_folder, [ 'r50_score_' + self.args.savetag, 'r50_speedup_' + self.args.savetag ], '.csv') self.r101_frames_tracker = utils.Tracker(self.args.plot_folder, [ 'r101_score_' + self.args.savetag, 'r101_speedup_' + self.args.savetag ], '.csv') self.bert_frames_tracker = utils.Tracker(self.args.plot_folder, [ 'bert_score_' + self.args.savetag, 'bert_speedup_' + self.args.savetag ], '.csv') #Genealogy tool self.genealogy = Genealogy() self.env = env self.test_envs = test_envs if self.args.use_mp: #MP TOOLS self.manager = Manager() #Initialize Mixed Population self.population = self.manager.list() else: self.population = [] boltzman_count = int(args.pop_size * args.ratio) rest = args.pop_size - boltzman_count for _ in range(boltzman_count): self.population.append( BoltzmannChromosome(model_constructor.num_nodes, model_constructor.action_dim)) for _ in range(rest): self.population.append( model_constructor.make_model(self.policy_string)) self.population[-1].eval() #Save best policy self.best_policy = model_constructor.make_model(self.policy_string) #Init BUFFER self.replay_buffer = Buffer(args.buffer_size, state_template, action_space, args.aux_folder + args.savetag) self.data_bucket = self.replay_buffer.tuples #Intialize portfolio of learners self.portfolio = [] if args.rollout_size > 0: self.portfolio = initialize_portfolio(self.portfolio, self.args, self.genealogy, args.portfolio_id, model_constructor) #Initialize Rollout Bucket self.rollout_bucket = self.manager.list() if self.args.use_mp else [] for _ in range(len(self.portfolio)): self.rollout_bucket.append( model_constructor.make_model(self.policy_string)) if self.args.use_mp: ############## MULTIPROCESSING TOOLS ################### #Evolutionary population Rollout workers data_bucket = self.data_bucket if args.rollout_size > 0 else None #If Strictly Evo - don;t store data self.evo_task_pipes = [Pipe() for _ in range(args.pop_size)] self.evo_result_pipes = [Pipe() for _ in range(args.pop_size)] self.evo_workers = [ Process(target=rollout_worker, args=(id, 'evo', self.evo_task_pipes[id][1], self.evo_result_pipes[id][0], data_bucket, self.population, env_constructor)) for id in range(args.pop_size) ] for worker in self.evo_workers: worker.start() #Learner rollout workers self.task_pipes = [Pipe() for _ in range(args.rollout_size)] self.result_pipes = [Pipe() for _ in range(args.rollout_size)] self.workers = [ Process(target=rollout_worker, args=(id, 'pg', self.task_pipes[id][1], self.result_pipes[id][0], data_bucket, self.rollout_bucket, env_constructor)) for id in range(args.rollout_size) ] for worker in self.workers: worker.start() self.roll_flag = [True for _ in range(args.rollout_size)] self.evo_flag = [True for _ in range(args.pop_size)] #Meta-learning controller (Resource Distribution) self.allocation = [ ] #Allocation controls the resource allocation across learners for i in range(args.rollout_size): self.allocation.append( i % len(self.portfolio)) #Start uniformly (equal resources) #Trackers self.best_score = -float('inf') self.gen_frames = 0 self.total_frames = 0 self.best_speedup = -float('inf') self.champ_type = None
def __init__(self, args): self.args = args self.evolver = SSNE(self.args) #MP TOOLS self.manager = Manager() #Genealogy tool self.genealogy = Genealogy() #Initialize population self.pop = self.manager.list() for _ in range(args.pop_size): wwid = self.genealogy.new_id('evo') if ALGO == 'SAC': self.pop.append( GaussianPolicy(args.state_dim, args.action_dim, args.hidden_size, wwid)) else: self.pop.append(Actor(args.state_dim, args.action_dim, wwid)) if ALGO == "SAC": self.best_policy = GaussianPolicy(args.state_dim, args.action_dim, args.hidden_size, -1) else: self.best_policy = Actor(args.state_dim, args.action_dim, -1) #Turn off gradients and put in eval mod for actor in self.pop: actor = actor.cpu() actor.eval() #Init BUFFER self.replay_buffer = Buffer(1000000, self.args.buffer_gpu) #Intialize portfolio of learners self.portfolio = [] self.portfolio = initialize_portfolio(self.portfolio, self.args, self.genealogy, PORTFOLIO_ID) self.rollout_bucket = self.manager.list() for _ in range(len(self.portfolio)): if ALGO == 'SAC': self.rollout_bucket.append( GaussianPolicy(args.state_dim, args.action_dim, args.hidden_size, -1)) else: self.rollout_bucket.append( Actor(args.state_dim, args.action_dim, -1)) # Initialize shared data bucket self.data_bucket = self.replay_buffer.tuples ############## MULTIPROCESSING TOOLS ################### #Evolutionary population Rollout workers self.evo_task_pipes = [Pipe() for _ in range(args.pop_size)] self.evo_result_pipes = [Pipe() for _ in range(args.pop_size)] self.evo_workers = [ Process(target=rollout_worker, args=(id, self.evo_task_pipes[id][1], self.evo_result_pipes[id][0], False, self.data_bucket, self.pop, ENV_NAME, None, ALGO)) for id in range(args.pop_size) ] for worker in self.evo_workers: worker.start() self.evo_flag = [True for _ in range(args.pop_size)] #Learner rollout workers self.task_pipes = [Pipe() for _ in range(args.rollout_size)] self.result_pipes = [Pipe() for _ in range(args.rollout_size)] self.workers = [ Process(target=rollout_worker, args=(id, self.task_pipes[id][1], self.result_pipes[id][0], True, self.data_bucket, self.rollout_bucket, ENV_NAME, args.noise_std, ALGO)) for id in range(args.rollout_size) ] for worker in self.workers: worker.start() self.roll_flag = [True for _ in range(args.rollout_size)] #Test bucket self.test_bucket = self.manager.list() if ALGO == 'SAC': self.test_bucket.append( GaussianPolicy(args.state_dim, args.action_dim, args.hidden_size, -1)) else: self.test_bucket.append(Actor(args.state_dim, args.action_dim, -1)) #5 Test workers self.test_task_pipes = [Pipe() for _ in range(TEST_SIZE)] self.test_result_pipes = [Pipe() for _ in range(TEST_SIZE)] self.test_workers = [ Process(target=rollout_worker, args=(id, self.test_task_pipes[id][1], self.test_result_pipes[id][0], False, None, self.test_bucket, ENV_NAME, args.noise_std, ALGO)) for id in range(TEST_SIZE) ] for worker in self.test_workers: worker.start() self.test_flag = False #Meta-learning controller (Resource Distribution) self.allocation = [ ] #Allocation controls the resource allocation across learners for i in range(args.rollout_size): self.allocation.append( i % len(self.portfolio)) #Start uniformly (equal resources) #self.learner_stats = [{'fitnesses': [], 'ep_lens': [], 'value': 0.0, 'visit_count':0} for _ in range(len(self.portfolio))] #Track node statistsitic (each node is a learner), to compute UCB scores #Trackers self.best_score = 0.0 self.gen_frames = 0 self.total_frames = 0 self.best_shaped_score = None self.test_score = None self.test_std = None
def gen_buffer(size): j_buff = org.vertx.java.framework.TestUtils.generateRandomBuffer(size) return Buffer(j_buff)