def put(self, actor_id): dbid = Actor.get_dbid(g.tenant, actor_id) try: actor = Actor.from_db(actors_store[dbid]) except KeyError: raise APIException( "actor not found: {}'".format(actor_id), 404) previous_image = actor.image args = self.validate_put(actor) args['tenant'] = g.tenant update_image = False if args['image'] == previous_image: args['status'] = actor.status else: update_image = True args['status'] = SUBMITTED args['api_server'] = g.api_server args['owner'] = g.user actor = Actor(**args) actors_store[actor.db_id] = actor.to_db() if update_image: ch = CommandChannel() ch.put_cmd(actor_id=actor.db_id, image=actor.image, tenant=args['tenant']) # return ok(result={'update_image': str(update_image)}, # msg="Actor updated successfully.") return ok(result=actor.display(), msg="Actor updated successfully.")
def get(self, actor_id, execution_id): def get_hypermedia(actor, exc): return {'_links': {'self': '{}/actors/v2/{}/executions/{}/logs'.format(actor.api_server, actor.id, exc.id), 'owner': '{}/profiles/v2/{}'.format(actor.api_server, actor.owner), 'execution': '{}/actors/v2/{}/executions/{}'.format(actor.api_server, actor.id, exc.id)}, } dbid = Actor.get_dbid(g.tenant, actor_id) try: actor = Actor.from_db(actors_store[dbid]) except KeyError: raise APIException( "actor not found: {}'".format(actor_id), 404) try: excs = executions_store[dbid] except KeyError: raise APIException("No executions found for actor {}.".format(actor_id)) try: exc = Execution.from_db(excs[execution_id]) except KeyError: raise APIException("Execution not found {}.".format(execution_id)) try: logs = logs_store[execution_id] except KeyError: logs = "" result={'logs': logs} result.update(get_hypermedia(actor, exc)) return ok(result, msg="Logs retrieved successfully.")
def subscribe(actor_id, worker_ch): """ Main loop for the Actor executor worker. Subscribes to the actor's inbox and executes actor containers when message arrive. Also subscribes to the worker channel for future communications. :return: """ actor_ch = ActorMsgChannel(actor_id) t = threading.Thread(target=process_worker_ch, args=(worker_ch, actor_id, actor_ch)) t.start() print("Worker subscribing to actor channel...") while keep_running: update_worker_status(actor_id, worker_ch.name, READY) try: msg = actor_ch.get(timeout=2) except channelpy.ChannelTimeoutException: continue print("Received message {}. Starting actor container...".format(str(msg))) message = msg.pop("msg", "") try: stats, logs = execute_actor(actor_id, worker_ch, image, message, msg) except DockerStartContainerError as e: print("Got DockerStartContainerError: {}".format(str(e))) Actor.set_status(actor_id, ERROR) continue # add the execution to the actor store print("Actor container finished successfully. Got stats object:{}".format(str(stats))) exc_id = Execution.add_execution(actor_id, stats) Execution.set_logs(exc_id, logs)
def do_save(self): for hexsha in self.cached_data: val = self.cached_data[hexsha] try: actor = Actor.objects.get(full_name = val['name']) except Actor.DoesNotExist: actor = Actor(full_name = val['name']) actor.save() #Create the actor try: commit = Commit.objects.get(hexsha = hexsha) except Commit.DoesNotExist: commit = Commit(hexsha = hexsha, repo = self.repo_model, actor = actor) commit.save() for path, fun in val['funcs']: if not Function.objects.filter(name = fun, path = path).exists(): fmodel = Function(name = fun, commit = commit, path = path) fmodel.save() print "Saved `%s` : `%s`" % (path[-16:], fun) for file_name in val['files_changed']: FileChange(path = file_name, actor = actor, commit = commit).save() self.cached_data.clear()
def get(self, actor_id): dbid = Actor.get_dbid(g.tenant, actor_id) try: actor = Actor.from_db(actors_store[dbid]) except KeyError: raise APIException( "actor not found: {}. db_id:{}'".format(actor_id, dbid), 404) return ok(result=actor.display(), msg="Actor retrieved successfully.")
def post(self, actor_id): id = Actor.get_dbid(g.tenant, actor_id) try: actor = Actor.from_db(actors_store[id]) except KeyError: raise APIException( "actor not found: {}'".format(actor_id), 404) args = self.validate_post() Execution.add_execution(id, args) return ok(result=actor.display(), msg="Actor execution added successfully.")
def get(self, actor_id, ch_name): try: Actor.from_db(actors_store[actor_id]) except KeyError: raise WorkerException("actor not found: {}'".format(actor_id)) try: worker = get_worker(actor_id, ch_name) except WorkerException as e: raise APIException(e.message, 404) return ok(result=worker, msg="Worker retrieved successfully.")
def post(self, actor_id): dbid = Actor.get_dbid(g.tenant, actor_id) args = self.validate_post() state = args['state'] try: actor = Actor.from_db(actors_store[dbid]) except KeyError: raise APIException( "actor not found: {}'".format(actor_id), 404) actors_store.update(dbid, 'state', state) return ok(result=actor.display(), msg="State updated successfully.")
def post(self, actor_id): """Add new permissions for an actor""" try: Actor.from_db(actors_store[actor_id]) except KeyError: raise APIException( "actor not found: {}'".format(actor_id), 404) args = self.validate_post() add_permission(args['user'], actor_id, args['level']) permissions = get_permissions(actor_id) return ok(result=permissions, msg="Permission added successfully.")
def get(self, actor_id): try: Actor.from_db(actors_store[actor_id]) except KeyError: raise APIException( "actor not found: {}'".format(actor_id), 404) try: permissions = get_permissions(actor_id) except PermissionsException as e: raise APIException(e.message, 404) return ok(result=permissions, msg="Permissions retrieved successfully.")
def post(self): args = self.validate_post() args['executions'] = {} args['state'] = '' args['subscriptions'] = [] args['status'] = SUBMITTED actor = Actor(args) actors_store[actor.id] = actor.to_db() ch = CommandChannel() ch.put_cmd(actor_id=actor.id, image=actor.image) return ok(result=actor, msg="Actor created successfully.")
def post(self): args = self.validate_post() args['tenant'] = g.tenant args['api_server'] = g.api_server args['owner'] = g.user actor = Actor(**args) actors_store[actor.db_id] = actor.to_db() ch = CommandChannel() ch.put_cmd(actor_id=actor.db_id, image=actor.image, tenant=args['tenant']) add_permission(g.user, actor.db_id, 'UPDATE') return ok(result=actor.display(), msg="Actor created successfully.", request=request)
def delete(self, actor_id): id = Actor.get_dbid(g.tenant, actor_id) shutdown_workers(id) try: actor = Actor.from_db(actors_store[id]) executions = actor.get('executions') or {} for ex_id, val in executions.items(): del logs_store[ex_id] except KeyError: print("Did not find actor with id: {}".format(id)) del actors_store[id] del permissions_store[id] return ok(result=None, msg='Actor deleted successfully.')
def post(self, actor_id): def get_hypermedia(actor, exc): return {'_links': {'self': '{}/actors/v2/{}/executions/{}'.format(actor.api_server, actor.id, exc), 'owner': '{}/profiles/v2/{}'.format(actor.api_server, actor.owner), 'messages': '{}/actors/v2/{}/messages'.format(actor.api_server, actor.id)},} args = self.validate_post() d = {} # build a dictionary of k:v pairs from the query parameters, and pass a single # additional object 'message' from within the post payload. Note that 'message' # need not be JSON data. for k, v in request.args.items(): if k == 'message': continue d[k] = v if hasattr(g, 'user'): d['_abaco_username'] = g.user if hasattr(g, 'api_server'): d['_abaco_api_server'] = g.api_server # if hasattr(g, 'jwt'): # d['_abaco_jwt'] = g.jwt # if hasattr(g, 'jwt_server'): # d['_abaco_jwt_server'] = g.jwt_server if hasattr(g, 'jwt_header_name'): d['_abaco_jwt_header_name'] = g.jwt_header_name dbid = Actor.get_dbid(g.tenant, actor_id) # create an execution exc = Execution.add_execution(dbid, {'cpu': 0, 'io': 0, 'runtime': 0, 'status': SUBMITTED, 'executor': g.user}) d['_abaco_execution_id'] = exc d['_abaco_Content-Type'] = args.get('_abaco_Content-Type', '') ch = ActorMsgChannel(actor_id=dbid) ch.put_msg(message=args['message'], d=d) # make sure at least one worker is available workers = Worker.get_workers(dbid) actor = Actor.from_db(actors_store[dbid]) if len(workers.items()) < 1: ch = CommandChannel() ch.put_cmd(actor_id=dbid, image=actor.image, tenant=g.tenant, num=1, stop_existing=False) result={'execution_id': exc, 'msg': args['message']} result.update(get_hypermedia(actor, exc)) case = Config.get('web', 'case') if not case == 'camel': return ok(result) else: return ok(dict_to_camel(result))
def post(self, actor_id): """Start new workers for an actor""" id = Actor.get_dbid(g.tenant, actor_id) try: actor = Actor.from_db(actors_store[id]) except KeyError: raise APIException( "actor not found: {}'".format(actor_id), 404) args = self.validate_post() num = args.get('num') if not num or num == 0: num = 1 ch = CommandChannel() ch.put_cmd(actor_id=actor.db_id, image=actor.image, tenant=g.tenant, num=num, stop_existing=False) return ok(result=None, msg="Scheduled {} new worker(s) to start.".format(str(num)))
def get(self, actor_id): dbid = Actor.get_dbid(g.tenant, actor_id) try: Actor.from_db(actors_store[dbid]) except KeyError: raise APIException("actor not found: {}'".format(actor_id), 400) try: workers = Worker.get_workers(dbid) except WorkerException as e: raise APIException(e.msg, 404) result = [] for id, worker in workers.items(): worker.update({'id': id}) result.append(worker) return ok(result=result, msg="Workers retrieved successfully.")
def test_serialize_unicode(self): """Tests that unicode makes the roundtrip intact""" actor_name = u"Za\u017c\u00f3\u0142\u0107" movie_title = u'G\u0119\u015bl\u0105 ja\u017a\u0144' ac = Actor(name=actor_name) mv = Movie(title=movie_title, actor=ac) ac.save() mv.save() serial_str = serializers.serialize(self.serializer_name, [mv]) self.assertEqual(self._get_field_values(serial_str, "title")[0], movie_title) self.assertEqual(self._get_field_values(serial_str, "actor")[0], actor_name) obj_list = list(serializers.deserialize(self.serializer_name, serial_str)) mv_obj = obj_list[0].object self.assertEqual(mv_obj.title, movie_title)
def get(self, actor_id): dbid = Actor.get_dbid(g.tenant, actor_id) try: summary = ExecutionsSummary(db_id=dbid) except DAOError as e: raise APIException("actor not found: {}. DAOError: {}'".format(actor_id, e), 404) return ok(result=summary.display(), msg="Actor executions retrieved successfully.")
def get(self, actor_id): try: actor = Actor.from_db(actors_store[actor_id]) except KeyError: raise APIException( "actor not found: {}'".format(actor_id), 404) return ok(result=actor, msg="Actor retrieved successfully.")
def get(self, actor_id): def get_hypermedia(actor): return {'_links': {'self': '{}/actors/v2/{}/messages'.format(actor.api_server, actor.id), 'owner': '{}/profiles/v2/{}'.format(actor.api_server, actor.owner), }, } # check that actor exists id = Actor.get_dbid(g.tenant, actor_id) try: actor = Actor.from_db(actors_store[id]) except KeyError: raise APIException( "actor not found: {}'".format(actor_id), 404) result={'messages': len(ActorMsgChannel(actor_id=id)._queue._queue)} result.update(get_hypermedia(actor)) return ok(result)
def get(self, actor_id): try: actor = Actor.from_db(actors_store[actor_id]) subscriptions = actor.get('subscriptions') or {'subscriptions': None} except KeyError: raise APIException( "actor not found: {}'".format(actor_id), 404) return ok(result=subscriptions, msg="Subscriptions retrieved successfully.")
def validate_put(self, actor): # inherit derived attributes from the original actor, including id and db_id: parser = Actor.request_parser() # remove since name is only required for POST, not PUT parser.remove_argument('name') # this update overrides all required and optional attributes actor.update(parser.parse_args()) return actor
def delete(self, actor_id, ch_name): id = Actor.get_dbid(g.tenant, actor_id) try: worker = Worker.get_worker(id, ch_name) except WorkerException as e: raise APIException(e.msg, 404) shutdown_worker(ch_name) return ok(result=None, msg="Worker scheduled to be stopped.")
def main(worker_ch_name, image): worker_ch = WorkerChannel(name=worker_ch_name) # first, attempt to pull image from docker hub: try: print("Worker pulling image {}...".format(image)) pull_image(image) except DockerError as e: # return a message to the spawner that there was an error pulling image and abort worker_ch.put({'status': 'error', 'msg': str(e)}) raise e # inform spawner that image pulled successfully print("Image pulled successfully") # wait to receive message from spawner that it is time to subscribe to the actor channel print("Worker waiting on message from spawner...") result = worker_ch.put_sync({'status': 'ok'}) if result['status'] == 'error': print("Worker received error message from spawner: {}. Quiting...".format(str(result))) raise WorkerException(str(result)) actor_id = result.get('actor_id') tenant = result.get('tenant') print("Worker received ok from spawner. Message: {}, actor_id:{}".format(result, actor_id)) api_server = None client_id = None client_secret = None access_token = None refresh_token = None if result.get('client') == 'yes': api_server = result.get('api_server') client_id = result.get('client_id') client_secret = result.get('client_secret') access_token = result.get('access_token') refresh_token = result.get('refresh_token') else: print("Did not get client:yes, got client:{}".format(result.get('client'))) Actor.set_status(actor_id, READY) subscribe(tenant, actor_id, api_server, client_id, client_secret, access_token, refresh_token, worker_ch)
def manage_workers(actor_id): """Scale workers for an actor if based on message queue size and policy.""" print("Entering manage_workers for {}".format(actor_id)) try: actor = Actor.from_db(actors_store[actor_id]) except KeyError: print("Did not find actor; returning.") return workers = Worker.get_workers(actor_id)
def get(self, actor_id): # check that actor exists try: actor = Actor.from_db(actors_store[actor_id]) except KeyError: raise APIException( "actor not found: {}'".format(actor_id), 404) # TODO # retrieve pending messages from the queue return ok(result={'messages': []})
def handle(socket, address): fileobj = socket.makefile('rw') while not Actor.by_socket(socket).disconnected: line = fileobj.readline() if not line: Actor.by_socket(socket).flush() Actor.by_socket(socket).disconnect() continue try: msg = Message.from_string(line) log.debug('<= %s %s' % (repr(msg.target), repr(msg))) resp = dispatcher.dispatch(socket, msg) except Exception as e: log.exception(e) actor = Actor.by_socket(socket) if actor.is_user() and actor.get_user().registered.nick and actor.get_user().registered.user: resp = [ Message(actor, 'NOTICE', 'The message your client has just sent could not be parsed or processed.'), Message(actor, 'NOTICE', 'If this is a problem with the server, please open an issue at:'), Message(actor, 'NOTICE', 'https://github.com/abesto/python-ircd'), Message(actor, 'NOTICE', '---'), Message(actor, 'NOTICE', 'The message sent by your client was:'), Message(actor, 'NOTICE', line.strip("\n")), Message(actor, 'NOTICE', 'The error was:'), Message(actor, 'NOTICE', str(e)), Message(actor, 'NOTICE', '---'), Message(actor, 'NOTICE', 'Closing connection.') ] quit_resp = dispatcher.dispatch(socket, Message(None, 'QUIT', 'Protocol error')) if isinstance(quit_resp, list): resp += quit_resp else: resp.append(quit_resp) else: resp = Message(actor, 'ERROR') Actor.by_socket(socket).disconnect() try: router.send(resp) except Exception as e: log.exception(e) Actor.by_socket(socket).disconnect()
def post(self, actor_id): args = self.validate_post() state = args['state'] try: actor = Actor.from_db(actors_store[actor_id]) except KeyError: raise APIException( "actor not found: {}'".format(actor_id), 404) actor.state = state actors_store[actor_id] = actor.to_db() return ok(result=actor, msg="State updated successfully.")
def check_new_params(self, cmd): valid, msg = self.check_common(cmd) # validate the actor_id try: actor = Actor.from_db(actors_store[cmd.get('actor_id')]) except KeyError: return False, "Unable to look up actor with id: {}".format(cmd.get('actor_id')), None # validate the worker id try: Worker.get_worker(actor_id=cmd.get('actor_id'), ch_name=cmd.get('worker_id')) except WorkerException as e: return False, "Unable to look up worker: {}".format(e.msg), None return valid, msg, actor.owner
def put(self, actor_id): try: actor = Actor.from_db(actors_store[actor_id]) except KeyError: raise APIException( "actor not found: {}'".format(actor_id), 404) args = self.validate_put() update_image = False args['name'] = actor['name'] args['id'] = actor['id'] args['executions'] = actor['executions'] args['state'] = actor['state'] if args['image'] == actor.image: args['status'] = actor.status else: update_image = True args['status'] = SUBMITTED actor = Actor(args) actors_store[actor.id] = actor.to_db() if update_image: ch = CommandChannel() ch.put_cmd(actor_id=actor.id, image=actor.image) return ok(result=actor, msg="Actor updated successfully.")
class TD3(object): def __init__(self, state_dim, action_dim, max_action, memory, args): # actor self.actor = Actor(state_dim, action_dim, max_action, layer_norm=args.layer_norm) self.actor_target = Actor(state_dim, action_dim, max_action, layer_norm=args.layer_norm) self.actor_target.load_state_dict(self.actor.state_dict()) self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), lr=args.actor_lr) # critic self.critic = CriticTD3(state_dim, action_dim, layer_norm=args.layer_norm) self.critic_target = CriticTD3(state_dim, action_dim, layer_norm=args.layer_norm) self.critic_target.load_state_dict(self.critic.state_dict()) self.critic_optimizer = torch.optim.Adam(self.critic.parameters(), lr=args.critic_lr) # cuda if torch.cuda.is_available(): self.actor = self.actor.cuda() self.actor_target = self.actor_target.cuda() self.critic = self.critic.cuda() self.critic_target = self.critic_target.cuda() # misc self.criterion = nn.MSELoss() self.state_dim = state_dim self.action_dim = action_dim self.max_action = max_action self.memory = memory # hyper-parameters self.tau = args.tau self.discount = args.discount self.batch_size = args.batch_size self.policy_noise = args.policy_noise self.noise_clip = args.noise_clip self.policy_freq = args.policy_freq def select_action(self, state, noise=None): state = FloatTensor(state.reshape(-1, self.state_dim)) action = self.actor(state).cpu().data.numpy().flatten() if noise is not None: action += noise.sample() return np.clip(action, -self.max_action, self.max_action) def train(self, iterations): for it in tqdm(range(iterations)): # Sample replay buffer x, y, u, r, d = self.memory.sample(self.batch_size) state = FloatTensor(x) next_state = FloatTensor(y) action = FloatTensor(u) reward = FloatTensor(r) done = FloatTensor(1 - d) # Select action according to policy and add clipped noise noise = np.clip( np.random.normal(0, self.policy_noise, size=(self.batch_size, self.action_dim)), -self.noise_clip, self.noise_clip) next_action = self.actor_target(next_state) + FloatTensor(noise) next_action = next_action.clamp(-self.max_action, self.max_action) # Q target = reward + discount * min_i(Qi(next_state, pi(next_state))) with torch.no_grad(): target_Q1, target_Q2 = self.critic_target( next_state, next_action) target_Q = torch.min(target_Q1, target_Q2) target_Q = reward + (done * self.discount * target_Q) # Get current Q estimates current_Q1, current_Q2 = self.critic(state, action) # Compute critic loss critic_loss = self.criterion( current_Q1, target_Q) + self.criterion(current_Q2, target_Q) # Optimize the critic self.critic_optimizer.zero_grad() critic_loss.backward() self.critic_optimizer.step() # Delayed policy updates if it % self.policy_freq == 0: # Compute actor loss Q1, Q2 = self.critic(state, self.actor(state)) actor_loss = -Q1.mean() # Optimize the actor self.actor_optimizer.zero_grad() actor_loss.backward() self.actor_optimizer.step() # Update the frozen target models for param, target_param in zip( self.critic.parameters(), self.critic_target.parameters()): target_param.data.copy_(self.tau * param.data + (1 - self.tau) * target_param.data) for param, target_param in zip(self.actor.parameters(), self.actor_target.parameters()): target_param.data.copy_(self.tau * param.data + (1 - self.tau) * target_param.data) def load(self, filename): self.actor.load_model(filename, "actor") self.critic.load_model(filename, "critic") def save(self, output): self.actor.save_model(output, "actor") self.critic.save_model(output, "critic")
def main(): env = gym.make('InvertedPendulum-v2') # states: [x, theta, x', theta'] # action: [horizontal force] nstates = 4 nactions = 1 T = 2048 # environement steps per update batch_size = 64 epochs = 10 lr = 0.01 discount = 0.99 clipping_epsilon = 0.2 lam = 0.95 # GAE parameter total_timesteps = 1000000 actor = Actor(nstates, nactions) critic = Critic(nstates) n_updates = total_timesteps // T if total_timesteps % T != 0: n_updates += 1 n_batches_per_update = T // batch_size if T % batch_size != 0: n_batches_per_update += 1 episode_rewards = [] critic_losses = [] for update in tqdm(range(n_updates)): states, actions, rewards, dones, values, log_probs, ep_rewards = rollout( env, actor, critic, T, nstates, max_ep_length) episode_rewards += ep_rewards advantages, returns = get_advantages_and_returns( dones, rewards, values, discount, lam, T) idx = np.arange(T) for k in range(epochs): np.random.default_rng().shuffle(idx) for n in range(0, n_batches_per_update, batch_size): batch_idx = idx[n:n + batch_size] state = states[batch_idx] action = actions[batch_idx] log_prob = log_probs[batch_idx] advantage = advantages[batch_idx] G = returns[batch_idx] _, current_log_probs = actor.forward(batch_states, batch_actions, requires_grad=True) ratios = np.exp(current_log_probs - batch_log_probs) clipped_ratios = np.minimum( 1 + clipping_epsilon, np.maximum(1 - clipping_epsilon, ratios)) unclipped_surrogate = ratios * batch_A clipped_surrogate = clipped_ratios * batch_A actor_loss = -np.minimum(unclipped_surrogate, clipped_surrogate).mean() current_state_values = critic.forward(batch_states, requires_grad=True) critic_loss = ((current_state_values - batch_returns)**2).mean() # derivative of actor_loss w.r.t current_log_probs dAL_dlp = -unclipped_surrogate # derivative of clipped_ratios w.r.t ratios dcr_dr = np.zeros_like(ratios) dcr_dr[(ratios < 1 + clipping_epsilon) & (ratios > 1 - clipping_epsilon)] = 1.0 # only include the derivative of the clipped_ratio if the clipped_ratio was used clipped_used_idx = clipped_surrogate < unclipped_surrogate dAL_dlp[clipped_used_idx] *= dcr_dr[clipped_used_idx] # derivative of critic_loss w.r.t current_state_values dCL_dsv = current_state_values - batch_returns actor.backward(dAL_dlp) critic.backward(dCL_dsv) actor.optimization_step(lr) critic.optimization_step(lr) actor_losses.append(actor_loss) critic_losses.append(critic_loss) env.close() fig, ax = plt.subplots() ax.plot(moving_average(episode_rewards, 100)) plt.show() plt.close() fig, ax = plt.subplots() ax.plot(moving_average(critic_losses, 10)) plt.show() plt.close()
class DDPG: """Implementation of DDPG. This implementation is adapted to this particular environment running several agent. At each time step, the same actor is controlling each agent sequentially. """ def __init__(self, state_size, action_size, config): """Initialize algorithm.""" if config.PER: self.memory = PrioritizeReplayBuffer( config.BUFFER_SIZE, config.BATCH_SIZE, config.SEED ) else: self.memory = ReplayBuffer( config.BUFFER_SIZE, config.BATCH_SIZE, config.SEED ) # Randomly initialize critic netowrk and actor self.actor = Actor(state_size, action_size, config.SEED).to(device) self.critic = Critic(state_size, action_size, config.SEED).to(device) # Initialize target networks with weights from actor critic # Actor self.actor_target = Actor(state_size, action_size, config.SEED).to(device) self.actor_target.load_state_dict(self.actor.state_dict()) # Critic self.critic_target = Critic(state_size, action_size, config.SEED).to(device) self.critic_target.load_state_dict(self.critic.state_dict()) # Actor optimizer self.actor_optimizer = torch.optim.Adam( self.actor.parameters(), lr=config.LR_ACTOR ) # Critic optimizer self.critic_optimizer = torch.optim.Adam( self.critic.parameters(), lr=config.LR_CRITIC ) self.config = config self.t_step = 0 self.expl_noise = config.EXPL_NOISE def step(self, target_sample=None, **kwargs): """Run a step of algorithm update.""" # Sample a random minibatch of transitions states, actions, rewards, next_states, dones = self._draw_minibatch() # Compute the target Q value target_Q = self.critic_target( next_states, self.actor_target(next_states) ).detach() y = rewards + (1 - dones) * self.config.GAMMA * target_Q # Update critic by minimizing the loss current_Q = self.critic(states, actions) # Compute TD error td_error = y - current_Q if self.config.PER: # Get importance_sampling_weights weights = torch.Tensor(self.memory.importance_sampling()).unsqueeze(1) # Update priorities self.memory.update_priorities(td_error.detach().cpu().numpy()) # Compute critic loss critic_loss = torch.mean(weights * td_error ** 2) else: # Compute critic loss critic_loss = torch.mean(td_error ** 2) # Optimize critic self.critic_optimizer.zero_grad() critic_loss.backward() # Clip gradient nn.utils.clip_grad_norm_(self.critic.parameters(), 1) self.critic_optimizer.step() # Update the actor policy using the sampled policy gradient: actor_loss = -self.critic(states, self.actor(states)).mean() self.actor_optimizer.zero_grad() actor_loss.backward() # CLip gradient nn.utils.clip_grad_norm_(self.actor.parameters(), 1) self.actor_optimizer.step() # Update target networks self.soft_update() def train(self, env, num_episode): """Train a DDPG agent.""" scores = [] scores_window = deque(maxlen=100) for episode in range(num_episode): # Init state and episode score states = env.reset(train_mode=True) score = np.zeros(states.shape[0]) done = False # Run episode while not done: # Select and run action actions = self.predict_actions(states) # TODO: dynamic low and high selection actions = self.add_gaussian_noise(actions, -1, 1) next_states, rewards, dones = env.step(actions) # Store all n_agent episodes in replay buffer for state, action, reward, next_state, done in zip( states, actions, rewards, next_states, dones ): self.memory.add(state, action, reward, next_state, done) # Update time step self.t_step = (self.t_step + 1) % self.config.UPDATE_EVERY # Optimisation step if UPDATE_EVERY and enough examples in memory if self.t_step == 0 and len(self.memory) > self.config.BATCH_SIZE: for _ in range(self.config.UPDATE_STEPS): self.step() # Update state and scores states = next_states score += rewards # End episode if any of the agent is done, to avoid storing too much # Done transitions in the replay buffer done = any(dones) # Keep track of running mean scores_window.append(max(score)) # Append current mean to scores list scores.append(np.mean(scores_window)) # Logging print( "\rEpisode {}\tAverage Score: {:.2f}, Last Score: {:.2f}".format( episode, np.mean(scores_window), max(score) ), end="", ) if (episode + 1) % 100 == 0: print( "\rEpisode {}\tAverage Score: {:.2f}".format( episode, np.mean(scores_window) ) ) return scores def soft_update(self): """Update the frozen target models.""" tau = self.config.TAU # Actor for param, target_param in zip( self.critic.parameters(), self.critic_target.parameters() ): target_param.data.copy_(tau * param.data + (1 - tau) * target_param.data) # Critic for param, target_param in zip( self.actor.parameters(), self.actor_target.parameters() ): target_param.data.copy_(tau * param.data + (1 - tau) * target_param.data) def predict_actions(self, states, **kwargs): """Predict next actions based on current policy.""" states = torch.from_numpy(states).float().unsqueeze(0).to(device) # Set actor to eval mode self.actor.eval() actions = [] with torch.no_grad(): for state in states: action = self.actor(state) actions.append(action.detach().numpy()) # Set actor to train mode self.actor.train() return np.array(actions).squeeze() def add_gaussian_noise(self, action, low, high): """Add Gaussian noise to action, and clip between low and high.""" return (action + np.random.normal(0, self.expl_noise, size=action.shape)).clip( low, high ) def _draw_minibatch(self): """Draw a minibatch in the replay buffer.""" states, actions, rewards, next_states, done = zip(*self.memory.sample()) states = torch.Tensor(states).to(device) actions = torch.Tensor(actions).to(device) rewards = torch.Tensor(rewards).unsqueeze(1).to(device) next_states = torch.Tensor(next_states).to(device) done = torch.Tensor(done).unsqueeze(1).to(device) return states, actions, rewards, next_states, done def save_model(self, path, **kwargs): """Save actor model weights.""" torch.save(self.actor.state_dict(), path)
def test_401_drop_actor_unsuccessful(self): actor = Actor('ali', 30, 'M') actor.insert() res = self.client().delete('/actors/' + str(actor.id), headers=settingup_auth('')) self.assertEqual(res.status_code, 401)
def __init__(self, env, nS, nA, config): self.seed = config.seed self.name = config.name self.nA = nA self.nS = nS self.num_agents = config.num_agents self.episodes = config.episodes self.tmax = config.tmax self.print_every = config.print_every self.update_every = config.UPDATE_EVERY self.SGD_epoch = config.SGD_epoch self.actor_path = config.actor_path self.critic_path = config.critic_path self.noise = GaussianNoise((self.num_agents, nA), config.episodes) # self.noise = OUnoise(nA,config.seed) self.winning_condition = config.winning_condition self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") # Hyperparams self.gamma = config.gamma self.buffer_size = config.buffer_size self.min_buffer_size = config.min_buffer_size self.batch_size = config.batch_size self.L2 = config.L2 self.tau = config.TAU # For multi agent self.nO = self.num_agents * nS # Observation space self.env = env self.R = ReplayBuffer(config.buffer_size, config.batch_size, config.seed) # Instantiating Actor and Critic self.base_actor = Actor(self.seed, self.nS, self.nA) self.base_critic = Critic(self.seed, self.nO, self.nA) # Instantiate the desired number of agents and envs self.local_critics = [ Critic(self.seed, self.nO, self.nA) for agent in range(self.num_agents) ] self.local_actors = [ Actor(self.seed, self.nS, self.nA) for agent in range(self.num_agents) ] self.target_critics = [ Critic(self.seed, self.nO, self.nA) for agent in range(self.num_agents) ] self.target_actors = [ Actor(self.seed, self.nS, self.nA) for agent in range(self.num_agents) ] # Copy the weights from base agents to target and local map(lambda x: hard_update(self.base_critic, x), self.local_critics) map(lambda x: hard_update(self.base_critic, x), self.target_critics) map(lambda x: hard_update(self.base_actor, x), self.local_actors) map(lambda x: hard_update(self.base_actor, x), self.target_actors) # Instantiate optimizers self.critic_optimizers = [ optim.Adam(self.local_critics[i].parameters(), lr=1e-3, weight_decay=self.L2) for i in range(self.num_agents) ] self.actor_optimizers = [ optim.Adam(self.local_actors[i].parameters(), lr=1e-4) for i in range(self.num_agents) ]
from collections import deque import random import torch from torch import optim from tqdm import tqdm from env import Env from models import Actor, Critic, create_target_network, update_target_network from utils import plot max_steps, update_start, update_interval, batch_size, discount, policy_delay, polyak_rate = 100000, 10000, 4, 128, 0.99, 2, 0.995 env = Env() actor = Actor() critic_1 = Critic(state_action=True) critic_2 = Critic(state_action=True) target_actor = create_target_network(actor) target_critic_1 = create_target_network(critic_1) target_critic_2 = create_target_network(critic_2) actor_optimiser = optim.Adam(actor.parameters(), lr=1e-3) critics_optimiser = optim.Adam(list(critic_1.parameters()) + list(critic_2.parameters()), lr=1e-3) D = deque(maxlen=10000) state, done, total_reward = env.reset(), False, 0 pbar = tqdm(range(1, max_steps + 1), unit_scale=1, smoothing=0) for step in pbar: with torch.no_grad(): if step < update_start: # To improve exploration take actions sampled from a uniform random distribution over actions at the start of training action = torch.tensor([[2 * random.random() - 1]]) else:
class Agent(object): ''' Implementation of a DQN agent that interacts with and learns from the environment ''' def __init__(self, state_size, action_size, rand_seed, meta_agent): '''Initialize an MetaAgent object. :param state_size: int. dimension of each state :param action_size: int. dimension of each action :param nb_agents: int. number of agents to use :param rand_seed: int. random seed :param memory: ReplayBuffer object. ''' self.action_size = action_size self.__name__ = 'DDPG' # Actor Network (w/ Target Network) self.actor_local = Actor(state_size, action_size, rand_seed).to(DEVC) self.actor_target = Actor(state_size, action_size, rand_seed).to(DEVC) self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=LR_ACTOR) # Critic Network (w/ Target Network) self.critic_local = Critic(state_size, action_size, meta_agent.nb_agents, rand_seed).to(DEVC) self.critic_target = Critic(state_size, action_size, meta_agent.nb_agents, rand_seed).to(DEVC) # NOTE: the decay corresponds to L2 regularization self.critic_optimizer = optim.Adam( self.critic_local.parameters(), lr=LR_CRITIC) # , weight_decay=WEIGHT_DECAY) # Noise process self.noise = OUNoise(action_size, rand_seed) # Replay memory self.memory = meta_agent.memory # Initialize time step (for updating every UPDATE_EVERY steps) self.t_step = 0 def step(self, state, action, reward, next_state, done, others_states, others_actions, others_next_states): self.memory.add(state, action, reward, next_state, done, others_states, others_actions, others_next_states) # Learn every UPDATE_EVERY time steps. self.t_step = (self.t_step + 1) % UPDATE_EVERY if self.t_step == 0: # If enough samples are available in memory, get random subset and learn if len(self.memory) > BATCH_SIZE: # source: Sample a random minibatch of N transitions from R experiences = self.memory.sample() self.learn(experiences, GAMMA) def act(self, states, add_noise=True): '''Returns actions for given states as per current policy. :param states: array_like. current states :param add_noise: Boolean. If should add noise to the action ''' states = torch.from_numpy(states).float().to(DEVC) self.actor_local.eval() with torch.no_grad(): actions = self.actor_local(states).cpu().data.numpy() self.actor_local.train() if add_noise: actions += self.noise.sample() return np.clip(actions, -1, 1) def reset(self): self.noise.reset() def learn(self, experiences, gamma): ''' Update policy and value params using given batch of experience tuples. Q_targets = r + ? * critic_target(next_state, actor_target(next_state)) where: actor_target(state) -> action critic_target(state, action) -> Q-value :param experiences: Tuple[torch.Tensor]. tuple of (s, a, r, s', done) :param gamma: float. discount factor ''' (states, actions, rewards, next_states, dones, others_states, others_actions, others_next_states) = experiences # rewards_ = torch.clamp(rewards, min=-1., max=1.) rewards_ = rewards all_states = torch.cat((states, others_states), dim=1).to(DEVC) all_actions = torch.cat((actions, others_actions), dim=1).to(DEVC) all_next_states = torch.cat((next_states, others_next_states), dim=1).to(DEVC) # --------------------------- update critic --------------------------- # Get predicted next-state actions and Q values from target models l_all_next_actions = [] l_all_next_actions.append(self.actor_target(states)) l_all_next_actions.append(self.actor_target(others_states)) all_next_actions = torch.cat(l_all_next_actions, dim=1).to(DEVC) Q_targets_next = self.critic_target(all_next_states, all_next_actions) # Compute Q targets for current states (y_i) Q_targets = rewards_ + (gamma * Q_targets_next * (1 - dones)) # Compute critic loss: L = 1/N SUM{(yi ? Q(si, ai|?Q))^2} Q_expected = self.critic_local(all_states, all_actions) critic_loss = F.mse_loss(Q_expected, Q_targets) # Minimize the loss self.critic_optimizer.zero_grad() critic_loss.backward() self.critic_optimizer.step() # --------------------------- update actor --------------------------- # Compute actor loss this_actions_pred = self.actor_local(states) others_actions_pred = self.actor_local(others_states) others_actions_pred = others_actions_pred.detach() actions_pred = torch.cat((this_actions_pred, others_actions_pred), dim=1).to(DEVC) actor_loss = -self.critic_local(all_states, actions_pred).mean() # Minimize the loss self.actor_optimizer.zero_grad() actor_loss.backward() self.actor_optimizer.step() # ---------------------- update target networks ---------------------- # Update the critic target networks # Update the actor target networks self.soft_update(self.critic_local, self.critic_target, TAU) self.soft_update(self.actor_local, self.actor_target, TAU) def soft_update(self, local_model, target_model, tau): '''Soft update model parameters. ?_target = ?*?_local + (1 - ?)*?_target :param local_model: PyTorch model. weights will be copied from :param target_model: PyTorch model. weights will be copied to :param tau: float. interpolation parameter ''' iter_params = zip(target_model.parameters(), local_model.parameters()) for target_param, local_param in iter_params: tensor_aux = tau * local_param.data + (1.0 - tau) * target_param.data target_param.data.copy_(tensor_aux) def reset(self): self.noise.reset()
class TD3: def __init__(self, env, state_dim, action_dim, max_action, gamma=0.99, tau=0.005, policy_noise=0.2, noise_clip=0.5, policy_freq=2): self.actor = Actor(state_dim, action_dim) self.actor_target = Actor(state_dim, action_dim) self.actor_target.load_state_dict(self.actor.state_dict()) self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=1e-3) self.critic = Critic(state_dim, action_dim) self.critic_target = Critic(state_dim, action_dim) self.critic_target.load_state_dict(self.critic.state_dict()) self.critic_optimizer = optim.Adam(self.critic.parameters(), lr=1e-3) self.max_action = max_action self.gamma = gamma self.tau = tau self.policy_noise = policy_noise self.noise_clip = noise_clip self.policy_freq = policy_freq self.device = 'cuda' if torch.cuda.is_available() else 'cpu' self.actor.to(self.device) self.actor_target.to(self.device) self.critic.to(self.device) self.critic_target.to(self.device) self.env = env self.total_it = 0 def select_action(self, state, noise=0.1): action = self.actor(state.to(self.device)).data.cpu().numpy().flatten() if noise != 0: action = (action + np.random.normal( 0, noise, size=self.env.action_space.shape[0])) return action.clip(self.env.action_space.low, self.env.action_space.high) def train(self, replay_buffer, batch_size=128): self.total_it += 1 states, states_, actions, rewards, terminal = replay_buffer.sample_buffer( batch_size) with torch.no_grad(): noise = (torch.randn_like(actions.to(self.device)) * self.policy_noise).clamp(-self.noise_clip, self.noise_clip) next_action = (self.actor_target(states_.to(self.device)) + noise).clamp(-self.max_action, self.max_action) # compute the target Q value target_q1, target_q2 = self.critic_target( states_.to(self.device), next_action.to(self.device)) target_q = torch.min(target_q1, target_q2) # target_q = rewards + terminal * self.gamma + target_q.cpu() # target_q = rewards + (terminal.reshape(256, 1) * self.gamma * target_q).detach() target_q = rewards + terminal * self.gamma * target_q[:, 0].cpu() # Get current Q value current_q1, current_q2 = self.critic(states.to(self.device), actions.to(self.device)) # Compute critic loss critic_loss = F.mse_loss(current_q1[:, 0], target_q.to( self.device)) + F.mse_loss(current_q2[:, 0], target_q.to(self.device)) # optimize the critic self.critic_optimizer.zero_grad() critic_loss.backward() self.critic_optimizer.step() # Delayed policy updates if self.total_it % self.policy_freq == 0: # Compote actor loss actor_loss = -self.critic.q1(states.to( self.device), self.actor(states.to(self.device))).mean() # Optimize the actor self.actor_optimizer.zero_grad() actor_loss.backward() self.actor_optimizer.step() # Update the frozen target models for param, target_param in zip(self.critic.parameters(), self.critic_target.parameters()): target_param.data.copy_(self.tau * param.data + (1 - self.tau) * target_param.data) for param, target_param in zip(self.actor.parameters(), self.actor_target.parameters()): target_param.data.copy_(self.tau * param.data + (1 - self.tau) * target_param.data) def save(self, filename): torch.save(self.critic.state_dict(), filename + "_critic") torch.save(self.critic_optimizer.state_dict(), filename + "_critic_optimizer") torch.save(self.actor.state_dict(), filename + "_actor") torch.save(self.actor_optimizer.state_dict(), filename + "_actor_optimizer") def load(self, filename): self.critic.load_state_dict(torch.load(filename + "_critic")) self.critic_optimizer.load_state_dict( torch.load(filename + "_critic_optimizer")) self.actor.load_state_dict(torch.load(filename + "_actor")) self.actor_optimizer.load_state_dict( torch.load(filename + "_actor_optimizer"))
def test_get_actors_filtered_by_two_id(self): actor_one = Actor(**AppTestCase.test_actor) actor_one.insert() actor_one_id = actor_one.id actor_two = Actor(**AppTestCase.test_actor) actor_two.insert() actor_two_id = actor_two.id actor_id_list = [actor_one_id, actor_two_id] res = self.client().get(f'/actors?id={actor_one_id}&id={actor_two_id}') data = json.loads(res.data) self.assertEqual(res.status_code, 200) self.assertTrue(data['success']) self.assertTrue(data['page'] == 1) self.assertTrue(data['total_actors'] == 2) self.assertTrue(data['actors'][0]['id'] in actor_id_list) self.assertTrue(data['actors'][1]['id'] in actor_id_list) actor_one.delete() actor_two.delete()
class Agent(): """Interacts with and learns from the environment.""" def __init__(self, state_size, action_size, random_seed): """Initialize an Agent object. Params ====== state_size (int): dimension of each state action_size (int): dimension of each action random_seed (int): random seed """ self.state_size = state_size self.action_size = action_size self.seed = random.seed(random_seed) self.epsilon = EPSILON ### DEFINE THE ACTOR NETWORK ### ### INFINITE STEP BOOTSRAPPING, THEREFORE HIGH VARIANCE ### self.actor_local = Actor(state_size, action_size, random_seed).to(device) self.actor_target = Actor(state_size, action_size, random_seed).to(device) self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=LR_ACTOR) ### DEFINE THE CRITIC NETWORK ### ### ONE STEP BOOTSRAPPING, THEREFORE HIGH BIAS ### self.critic_local = Critic(state_size, action_size, random_seed).to(device) self.critic_target = Critic(state_size, action_size, random_seed).to(device) self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=LR_CRITIC, weight_decay=WEIGHT_DECAY) ### PROCCESS TO CREATE NOISE ### self.noise = OUNoise(action_size, random_seed) # Replay memory self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, random_seed) def step(self, state, action, reward, next_state, done, timestep): """Save experience in replay memory, and use random sample from buffer to learn.""" # Save experience / reward self.memory.add(state, action, reward, next_state, done) # Learn at defined interval, if enough samples are available in memory if len(self.memory) > BATCH_SIZE and timestep % LEARN_EVERY == 0: for _ in range(LEARN_NUM): experiences = self.memory.sample() self.learn(experiences, GAMMA) def act(self, state, add_noise=True): """Returns actions for given state as per current policy.""" state = torch.from_numpy(state).float().to(device) self.actor_local.eval() with torch.no_grad(): action = self.actor_local(state).cpu().data.numpy() self.actor_local.train() if add_noise: action += self.epsilon * self.noise.sample() return np.clip(action, -1, 1) def reset(self): self.noise.reset() def learn(self, experiences, gamma): """Update policy and value parameters using given batch of experience tuples. Q_targets = r + γ * critic_target(next_state, actor_target(next_state)) where: actor_target(state) -> action critic_target(state, action) -> Q-value Params ====== experiences (Tuple[torch.Tensor]): tuple of (s, a, r, s', done) tuples gamma (float): discount factor """ states, actions, rewards, next_states, dones = experiences # ---------------------------- update critic ---------------------------- # # Get predicted next-state actions and Q values from target models actions_next = self.actor_target(next_states) Q_targets_next = self.critic_target(next_states, actions_next) # Compute Q targets for current states (y_i) Q_targets = rewards + (gamma * Q_targets_next * (1 - dones)) # Compute critic loss Q_expected = self.critic_local(states, actions) critic_loss = F.mse_loss(Q_expected, Q_targets) # Minimize the loss self.critic_optimizer.zero_grad() critic_loss.backward() torch.nn.utils.clip_grad_norm_(self.critic_local.parameters(), 1) self.critic_optimizer.step() # ---------------------------- update actor ---------------------------- # # Compute actor loss actions_pred = self.actor_local(states) actor_loss = -self.critic_local(states, actions_pred).mean() # Minimize the loss self.actor_optimizer.zero_grad() actor_loss.backward() self.actor_optimizer.step() # ----------------------- update target networks ----------------------- # self.soft_update(self.critic_local, self.critic_target, TAU) self.soft_update(self.actor_local, self.actor_target, TAU) # ---------------------------- update noise ---------------------------- # self.epsilon -= EPSILON_DECAY self.noise.reset() def soft_update(self, local_model, target_model, tau): """Soft update model parameters. θ_target = τ*θ_local + (1 - τ)*θ_target Params ====== local_model: PyTorch model (weights will be copied from) target_model: PyTorch model (weights will be copied to) tau (float): interpolation parameter """ for target_param, local_param in zip(target_model.parameters(), local_model.parameters()): target_param.data.copy_(tau * local_param.data + (1.0 - tau) * target_param.data)
class PPO(BaseAgent): def __init__(self, config): super(PPO, self).__init__() self.config = config torch.manual_seed(self.config['seed']) np.random.seed(self.config['seed']) if self.config['experiment'][ 'orthogonal_initialization_and_layer_scaling']: weight_init_scheme = 'orthogonal' else: weight_init_scheme = 'normal' self.actor = Actor( device=self.config['device'], input_dim=self.config['env']['nS'], output_dim=self.config['env']['nA'], hidden_dims=self.config['model']['actor']['hidden_dims'], hidden_activation_fn=self.config['model']['actor'] ['hidden_acivation_fn'], weight_init_scheme=weight_init_scheme) self.actor_optimizer = optim.Adam( self.actor.parameters(), lr=self.config['model']['actor']['lr'], betas=self.config['model']['actor']['betas']) self.critic = Critic( device=self.config['device'], input_dim=self.config['env']['nS'], hidden_dims=self.config['model']['critic']['hidden_dims'], hidden_activation_fn=self.config['model']['critic'] ['hidden_acivation_fn'], weight_init_scheme=weight_init_scheme) self.critic_optimizer = optim.Adam( self.critic.parameters(), lr=self.config['model']['critic']['lr'], betas=self.config['model']['critic']['betas']) if self.config['train']['gail']: self.discriminator = Discriminator( device=self.config['device'], state_dim=self.config['env']['nS'], action_dim=self.config['env']['nA'], hidden_dims=self.config['model']['discriminator'] ['hidden_dims'], hidden_activation_fn=self.config['model']['discriminator'] ['hidden_acivation_fn'], weight_init_scheme=weight_init_scheme) self.discriminator_optimizer = optim.Adam( self.discriminator.parameters(), lr=self.config['model']['discriminator']['lr'], betas=self.config['model']['discriminator']['betas']) # [EXPERIMENT] - reward scaler: r / rs.std() if self.config['experiment']['reward_standardization']: self.reward_scaler = RewardScaler( gamma=self.config['train']['gamma']) # [EXPERIMENT] - observation scaler: (ob - ob.mean()) / (ob.std()) if self.config['experiment']['observation_normalization']: self.observation_scaler = ObservationScaler() # train def train(self): """ # initialize env, memory # foreach episode # foreach timestep # select action # step action # add exp to the memory # if done or timeout or memory_full: update gae & tdlamret # if memory is full # bootstrap value # optimize # clear memory # if done: # wrapup episode # break """ writer_path = os.path.join('experiments', self.config['exp_name'], 'runs') self.writer = SummaryWriter(writer_path) # Pretrain with BC if self.config['train']['bc']: bc_train_set, bc_valid_set = get_bc_dataset( self.config['train']['bc']['samples_exp_name'], self.config['train']['bc']['minimum_score'], self.config['train']['bc']['batch_size'], self.config['train']['bc']['demo_count'], self.config['train']['bc']['val_size']) if self.config['experiment']['observation_normalization']: use_obs_scaler = True else: use_obs_scaler = False self.actor = pretrain(self.actor, self.config['train']['bc']['lr'], self.config['train']['bc']['epochs'], bc_train_set, bc_valid_set, use_obs_scaler, writer=self.writer) # GAIL if self.config['train']['gail']: self.expert_dataset = get_gail_dataset( self.config['train']['gail']['samples_exp_name'], self.config['train']['gail']['minimum_score'], self.config['train']['gail']['n_samples'], self.config['train']['ppo']['memory_size'], self.config['train']['gail']['dstep']) self.best_score = 0 # prepare env, memory, stuff env = self.init_env(self.config['env']['name']) env.seed(self.config['seed']) self.memory = PPOMemory(gamma=self.config['train']['gamma'], tau=self.config['train']['gae']['tau']) score_queue = deque(maxlen=self.config['train']['average_interval']) length_queue = deque(maxlen=self.config['train']['average_interval']) if self.config['train']['gail']: irl_score_queue = deque( maxlen=self.config['train']['average_interval']) for episode in trange(1, self.config['train']['max_episodes'] + 1): self.episode = episode episode_score = 0 if self.config['train']['gail']: irl_episode_score = 0 # reset env state = env.reset() for t in range(1, self.config['train']['max_steps_per_episode'] + 1): if self.episode % 100 == 0: env.render() # [EXPERIMENT] - observation scaler: (ob - ob.mean()) / (ob.std()) if self.config['experiment']['observation_normalization']: state = self.observation_scaler(state, update=True) # select action & estimate value from the state with torch.no_grad(): state_tensor = torch.tensor(state).unsqueeze( 0).float() # bsz = 1 action_tensor, logpa_tensor = self.actor.select_action( state_tensor) value_tensor = self.critic(state_tensor).squeeze( 1) # don't need bsz dim # step action action = action_tensor.numpy()[0] # single worker next_state, reward, done, _ = env.step(action) # update episode_score episode_score += reward # GAIL: get irl_reward if self.config['train']['gail']: with torch.no_grad(): reward = self.discriminator.get_irl_reward( state_tensor, action_tensor).detach() irl_episode_score += reward # [EXPERIMENT] - reward scaler r / rs.std() if self.config['experiment']['reward_standardization']: reward = self.reward_scaler(reward, update=True) # [EXPERIMENT] - reward clipping [-5, 5] if self.config['experiment']['reward_clipping']: reward = np.clip(reward, -5, 5) # add experience to the memory self.memory.store(s=state, a=action, r=reward, v=value_tensor.item(), lp=logpa_tensor.item()) # done or timeout or memory full # done => v = 0 # timeout or memory full => v = critic(next_state) # update gae & return in the memory!! timeout = t == self.config['train']['max_steps_per_episode'] time_to_optimize = len( self.memory) == self.config['train']['ppo']['memory_size'] if done or timeout or time_to_optimize: if done: # cuz the game is over, value of the next state is 0 v = 0 else: # if not, estimate it with the critic next_state_tensor = torch.tensor(next_state).unsqueeze( 0).float() # bsz = 1 with torch.no_grad(): next_value_tensor = self.critic( next_state_tensor).squeeze(1) v = next_value_tensor.item() # update gae & tdlamret self.memory.finish_path(v) # if memory is full, optimize PPO if time_to_optimize: self.optimize() if done: score_queue.append(episode_score) length_queue.append(t) if self.config['train']['gail']: irl_score_queue.append(irl_episode_score) break # update state state = next_state avg_score = np.mean(score_queue) std_score = np.std(score_queue) avg_duration = np.mean(length_queue) self.writer.add_scalar("info/score", avg_score, self.episode) self.writer.add_scalar("info/duration", avg_duration, self.episode) if self.config['train']['gail']: avg_score = np.mean(irl_score_queue) self.writer.add_scalar("info/irl_score", avg_score, self.episode) if self.episode % 100 == 0: print("{} - score: {:.1f} +-{:.1f} \t duration: {}".format( self.episode, avg_score, std_score, avg_duration)) # game-solved condition # if avg_score >= self.config['train']['terminal_score']: # print("game solved at ep {}".format(self.episode)) # self.save_weight(self.actor, self.config['exp_name'], "best") # break if avg_score >= self.best_score and self.episode >= 200: print("found best model at episode: {}".format(self.episode)) self.save_weight(self.actor, self.config['exp_name'], "best") self.best_score = avg_score # [EXPERIMENT] - observation scaler: (ob - ob.mean()) / (ob.std()) if self.config['experiment']['observation_normalization']: self.observation_scaler.save(self.config['exp_name']) self.save_weight(self.actor, self.config['exp_name'], "last") return self.best_score # optimize def optimize(self): data = self.prepare_data(self.memory.get()) # gail if self.config['train']['gail']: self.optimize_gail(data) self.optimize_ppo(data) def prepare_data(self, data): states_tensor = torch.from_numpy(np.stack( data['states'])).float() # bsz, 8 actions_tensor = torch.tensor(data['actions']).long() # bsz logpas_tensor = torch.tensor(data['logpas']).float() # bsz tdlamret_tensor = torch.tensor(data['tdlamret']).float() # bsz advants_tensor = torch.tensor(data['advants']).float() # bsz values_tensor = torch.tensor(data['values']).float() # bsz # normalize advant a.k.a atarg advants_tensor = (advants_tensor - advants_tensor.mean()) / ( advants_tensor.std() + 1e-5) data_tensor = dict(states=states_tensor, actions=actions_tensor, logpas=logpas_tensor, tdlamret=tdlamret_tensor, advants=advants_tensor, values=values_tensor) return data_tensor def ppo_iter(self, batch_size, ob, ac, oldpas, atarg, tdlamret, vpredbefore): total_size = ob.size(0) indices = np.arange(total_size) np.random.shuffle(indices) n_batches = total_size // batch_size for nb in range(n_batches): ind = indices[batch_size * nb:batch_size * (nb + 1)] yield ob[ind], ac[ind], oldpas[ind], atarg[ind], tdlamret[ ind], vpredbefore[ind] def optimize_gail(self, data): """ https://github.com/openai/baselines/blob/master/baselines/gail/trpo_mpi.py bsz = learner_batch_size // d_step for each ob_batch, ac_batch in learner_dataset: get ob_expert, ac_expert from expert_dataset get learner_logit from D get expert_logit from D get learner loss vs. torch.ones() get expert loss vs. torch.zeros() update D """ loss_fn = nn.BCELoss() D_losses = [] learner_accuracies = [] expert_accuracies = [] learner_ob = data['states'] learner_ac = data['actions'] rub = torch.zeros_like( learner_ob) # not doing anything.. just wanted to reuse ppo_iter() learner_iter = self.ppo_iter(self.expert_dataset.batch_size, learner_ob, learner_ac, rub, rub, rub, rub) for learner_ob_b, learner_ac_b, _, _, _, _ in learner_iter: expert_ob_b, expert_ac_b = self.expert_dataset.get_next_batch() if self.config['experiment']['observation_normalization']: expert_ob_b = self.observation_scaler(expert_ob_b, update=False).float() learner_logit = self.discriminator.forward(learner_ob_b, learner_ac_b) learner_prob = torch.sigmoid(learner_logit) expert_logit = self.discriminator.forward(expert_ob_b, expert_ac_b) expert_prob = torch.sigmoid(expert_logit) learner_loss = loss_fn(learner_prob, torch.ones_like(learner_prob)) expert_loss = loss_fn(expert_prob, torch.zeros_like(expert_prob)) loss = learner_loss + expert_loss D_losses.append(loss.item()) self.discriminator_optimizer.zero_grad() loss.backward() self.discriminator_optimizer.step() learner_acc = ((learner_prob >= 0.5).float().mean().item()) expert_acc = ((expert_prob < 0.5).float().mean().item()) learner_accuracies.append(learner_acc) expert_accuracies.append(expert_acc) avg_d_loss = np.mean(D_losses) avg_learner_accuracy = np.mean(learner_accuracies) avg_expert_accuracy = np.mean(expert_accuracies) self.writer.add_scalar("info/discrim_loss", avg_d_loss, self.episode) self.writer.add_scalars("info/gail_accuracy", { 'learner': avg_learner_accuracy, 'expert': avg_expert_accuracy }, self.episode) def optimize_ppo(self, data): """ https://github.com/openai/baselines/blob/master/baselines/ppo1/pposgd_simple.py line 164 # get data from the memory # prepare dataloader # foreach optim_epochs # foreach batch # calculate loss and gradient # update nn """ ob = data['states'] ac = data['actions'] oldpas = data['logpas'] atarg = data['advants'] tdlamret = data['tdlamret'] vpredbefore = data['values'] # can't be arsed.. eps = self.config['train']['ppo']['clip_range'] policy_losses = [] entropy_losses = [] value_losses = [] # foreach policy_update_epochs for i in range(self.config['train']['ppo']['optim_epochs']): # foreach batch data_loader = self.ppo_iter( self.config['train']['ppo']['batch_size'], ob, ac, oldpas, atarg, tdlamret, vpredbefore) for batch in data_loader: ob_b, ac_b, old_logpas_b, atarg_b, vtarg_b, old_vpred_b = batch # policy loss cur_logpas, cur_entropies = self.actor.get_predictions( ob_b, ac_b) ratio = torch.exp(cur_logpas - old_logpas_b) # clip ratio clipped_ratio = torch.clamp(ratio, 1. - eps, 1. + eps) # policy_loss surr1 = ratio * atarg_b if self.config['experiment']['policy_noclip']: pol_surr = -surr1.mean() else: surr2 = clipped_ratio * atarg_b pol_surr = -torch.min(surr1, surr2).mean() # value_loss cur_vpred = self.critic(ob_b).squeeze(1) # [EXPERIMENT] - value clipping: clipped_value = old_values + (curr_values - old_values).clip(-eps, +eps) if self.config['experiment']['value_clipping']: cur_vpred_clipped = old_vpred_b + ( cur_vpred - old_vpred_b).clamp(-eps, eps) vloss1 = (cur_vpred - vtarg_b).pow(2) vloss2 = (cur_vpred_clipped - vtarg_b).pow(2) vf_loss = torch.max(vloss1, vloss2).mean() else: # original value_loss vf_loss = (cur_vpred - vtarg_b).pow(2).mean() # entropy_loss pol_entpen = -cur_entropies.mean() # total loss c1 = self.config['train']['ppo']['coef_vf'] c2 = self.config['train']['ppo']['coef_entpen'] # actor - backward self.actor_optimizer.zero_grad() policy_loss = pol_surr + c2 * pol_entpen policy_loss.backward() # [EXPERIMENT] - clipping gradient with max_norm=0.5 if self.config['experiment']['clipping_gradient']: nn.utils.clip_grad_norm_(self.actor.parameters(), max_norm=0.5) self.actor_optimizer.step() # critic - backward self.critic_optimizer.zero_grad() value_loss = c1 * vf_loss value_loss.backward() # [EXPERIMENT] - clipping gradient with max_norm=0.5 if self.config['experiment']['clipping_gradient']: nn.utils.clip_grad_norm_(self.critic.parameters(), max_norm=0.5) self.critic_optimizer.step() policy_losses.append(pol_surr.item()) entropy_losses.append(pol_entpen.item()) value_losses.append(vf_loss.item()) avg_policy_loss = np.mean(policy_losses) avg_value_losses = np.mean(value_losses) avg_entropy_losses = np.mean(entropy_losses) self.writer.add_scalar("info/policy_loss", avg_policy_loss, self.episode) self.writer.add_scalar("info/value_loss", avg_value_losses, self.episode) self.writer.add_scalar("info/entropy_loss", avg_entropy_losses, self.episode) # play def play(self, num_episodes=1, save_traj=False, seed=9999, record=False, save_result=False): # [EXPERIMENT] - observation scaler: (ob - ob.mean()) / (ob.std()) if self.config['experiment']['observation_normalization']: self.observation_scaler.load(self.config['exp_name']) # load policy self.load_weight(self.actor, self.config['exp_name']) env = self.init_env(self.config['env']['name']) env.seed(seed) if record: from gym import wrappers rec_dir = os.path.join("experiments", self.config['exp_name'], "seed_{}".format(seed)) env = wrappers.Monitor(env, rec_dir, force=True) scores, trajectories = [], [] for episode in range(num_episodes): current_trajectory = [] episode_score = 0 # initialize env state = env.reset() while True: # env.render() # [EXPERIMENT] - observation scaler: (ob - ob.mean()) / (ob.std()) if self.config['experiment']['observation_normalization']: state = self.observation_scaler(state, update=False) # select greedy action with torch.no_grad(): action_tensor = self.actor.select_greedy_action(state) action = action_tensor.numpy()[0] # single env current_trajectory.append((state, action)) # run action next_state, reward, done, _ = env.step(action) # add reward episode_score += reward # update state state = next_state # game over condition if done: scores.append(episode_score) trajectories.append((current_trajectory, episode_score)) break avg_score = np.mean(scores) print("Average score {} on {} games".format(avg_score, num_episodes)) if save_result: played_result_path = os.path.join("experiments", self.config['exp_name'], "runs", "play_score.pth") torch.save(scores, played_result_path) if save_traj: demo_dir = os.path.join("experiments", self.config['exp_name'], "demonstration") os.makedirs(demo_dir) torch.save(trajectories, os.path.join(demo_dir, "demo.pth")) print("saved {} trajectories.".format(num_episodes)) env.close()
def __init__(self, params): """Initialize an Agent object.""" self.params = params self.update_target_every = params['update_target_every'] self.update_every = params['update_every'] self.actor_update_every_multiplier = params[ 'actor_update_every_multiplier'] self.update_intensity = params['update_intensity'] self.gamma = params['gamma'] self.action_size = params['actor_params']['action_size'] self.num_agents = params['num_agents'] self.num_atoms = params['critic_params']['num_atoms'] self.v_min = params['critic_params']['v_min'] self.v_max = params['critic_params']['v_max'] self.update_target_type = params['update_target_type'] self.device = params['device'] self.name = params['name'] self.lr_reduction_factor = params['lr_reduction_factor'] self.tau = params['tau'] self.d4pg = params['d4pg'] # Distributes the number of atoms across the range of v min and max self.atoms = torch.linspace(self.v_min, self.v_max, self.num_atoms).to(self.device) # Initialize time step count self.t_step = 0 # Active and Target Actor networks self.actor_active = Actor(params['actor_params']).to(device) self.actor_target = Actor(params['actor_params']).to(device) if self.d4pg: # Active and Target D4PG Critic networks self.critic_active = D4PGCritic(params['critic_params']).to(device) self.critic_target = D4PGCritic(params['critic_params']).to(device) else: # Active and Target Critic networks self.critic_active = Critic(params['critic_params']).to(device) self.critic_target = Critic(params['critic_params']).to(device) self.actor_optimizer = optim.Adam(self.actor_active.parameters(), lr=params['actor_params']['lr']) self.critic_optimizer = optim.Adam(self.critic_active.parameters(), lr=params['critic_params']['lr']) self.schedule_lr = params['schedule_lr'] self.lr_steps = 0 # Create learning rate schedulers if required to reduce the learning rate # depeninding on plateuing of scores if self.schedule_lr: self.actor_scheduler = ReduceLROnPlateau( self.actor_optimizer, mode='max', factor=params['lr_reduction_factor'], patience=params['lr_patience_factor'], verbose=False, ) self.critic_scheduler = ReduceLROnPlateau( self.critic_optimizer, mode='max', factor=params['lr_reduction_factor'], patience=params['lr_patience_factor'], verbose=False, ) print("\n################ ACTOR ################\n") print(self.actor_active) print("\n################ CRITIC ################\n") print(self.critic_active) # Initiate exploration parameters by adding noise to the actions self.noise = params['noise'] # Replay memory self.memory = params['experience_replay']
class D4PGAgent(Agent): """An advance D4PG agent with an option to run on a simpler DDPG mode. The agent uses a distributional value estimation when running on D4PG vs the traditional single value estimation when running on DDPG mode.""" def __init__(self, params): """Initialize an Agent object.""" self.params = params self.update_target_every = params['update_target_every'] self.update_every = params['update_every'] self.actor_update_every_multiplier = params[ 'actor_update_every_multiplier'] self.update_intensity = params['update_intensity'] self.gamma = params['gamma'] self.action_size = params['actor_params']['action_size'] self.num_agents = params['num_agents'] self.num_atoms = params['critic_params']['num_atoms'] self.v_min = params['critic_params']['v_min'] self.v_max = params['critic_params']['v_max'] self.update_target_type = params['update_target_type'] self.device = params['device'] self.name = params['name'] self.lr_reduction_factor = params['lr_reduction_factor'] self.tau = params['tau'] self.d4pg = params['d4pg'] # Distributes the number of atoms across the range of v min and max self.atoms = torch.linspace(self.v_min, self.v_max, self.num_atoms).to(self.device) # Initialize time step count self.t_step = 0 # Active and Target Actor networks self.actor_active = Actor(params['actor_params']).to(device) self.actor_target = Actor(params['actor_params']).to(device) if self.d4pg: # Active and Target D4PG Critic networks self.critic_active = D4PGCritic(params['critic_params']).to(device) self.critic_target = D4PGCritic(params['critic_params']).to(device) else: # Active and Target Critic networks self.critic_active = Critic(params['critic_params']).to(device) self.critic_target = Critic(params['critic_params']).to(device) self.actor_optimizer = optim.Adam(self.actor_active.parameters(), lr=params['actor_params']['lr']) self.critic_optimizer = optim.Adam(self.critic_active.parameters(), lr=params['critic_params']['lr']) self.schedule_lr = params['schedule_lr'] self.lr_steps = 0 # Create learning rate schedulers if required to reduce the learning rate # depeninding on plateuing of scores if self.schedule_lr: self.actor_scheduler = ReduceLROnPlateau( self.actor_optimizer, mode='max', factor=params['lr_reduction_factor'], patience=params['lr_patience_factor'], verbose=False, ) self.critic_scheduler = ReduceLROnPlateau( self.critic_optimizer, mode='max', factor=params['lr_reduction_factor'], patience=params['lr_patience_factor'], verbose=False, ) print("\n################ ACTOR ################\n") print(self.actor_active) print("\n################ CRITIC ################\n") print(self.critic_active) # Initiate exploration parameters by adding noise to the actions self.noise = params['noise'] # Replay memory self.memory = params['experience_replay'] def act(self, states, add_noise=True, pretrain=False): """Returns actions for given state as per current policy.""" # If pretraining is active, the agent gives a random action thereby encouraging # intial exploration of the state space quickly if pretrain: actions = np.random.uniform(-1., 1., (self.num_agents, self.action_size)) else: with torch.no_grad(): actions = self.actor_active( states.to(device).float()).detach().to('cpu').numpy() if add_noise: noise = self.noise.create_noise(actions.shape) actions += noise actions = np.clip(actions, -1., 1.) return actions, self.noise.epsilon def step(self, states, actions, rewards, next_states, dones, pretrain=False): """Save experience in replay memory, and use random sample from buffer to learn.""" self.memory.add((states, actions, rewards, next_states, dones)) self.t_step += 1 if pretrain == False: return self.learn_() return None, None def learn_(self): "Learns from experience using a distributional value estimation when in D4PG mode" actor_loss = None critic_loss = None # If enough samples are available in memory and its time to learn, then learn! if self.memory.ready() and self.t_step % self.update_every == 0: # Learns multiple times with the same set of experience for _ in range(self.update_intensity): # Samples from the replay buffer which has calculated the n step returns in advance # Next state represents the state at the n'th step states, next_states, actions, rewards, dones = self.memory.sample( ) if self.d4pg: atoms = self.atoms.unsqueeze(0) # Calculate log probability distribution using Zw with regards to stored actions log_probs = self.critic_active(states, actions, log=True) # Calculate the projected log probabilities from the target actor and critic networks # Since back propogation is not required. Tensors are detach to increase speed target_dist = self._get_targets(rewards, next_states).detach() # The critic loss is calculated using a weighted distribution instead of the mean to # arrive at a more accurate result. Cross Entropy loss is used as it is considered to # be the most ideal for categorical value distributions as utlized in the D4PG critic_loss = -(target_dist * log_probs).sum(-1).mean() else: # Get predicted next-state actions and Q values from target models actions_next = self.actor_target(next_states) Q_targets_next = self.critic_target( next_states, actions_next).detach() # Compute Q targets for current states (y_i) Q_targets = rewards + (self.gamma * Q_targets_next * (1 - dones)) # Compute critic loss Q_expected = self.critic_active(states, actions) critic_loss = F.mse_loss(Q_expected, Q_targets) # Execute gradient descent for the critic self.critic_optimizer.zero_grad() critic_loss.backward() torch.nn.utils.clip_grad_norm_(self.critic_active.parameters(), 1) self.critic_optimizer.step() critic_loss = critic_loss.item() # Update actor every x multiples of critic if self.t_step % (self.actor_update_every_multiplier * self.update_every) == 0: if self.d4pg: # Predicts the action for the actor networks loss calculation predicted_action = self.actor_active(states) # Predict the value distribution using the critic with regards to action predicted by actor probs = self.critic_active(states, predicted_action) # Multiply probabilities by atom values and sum across columns to get Q values expected_reward = (probs * atoms).sum(-1) # Calculate the actor network loss (Policy Gradient) # Get the negative of the mean across the expected rewards to do gradient ascent actor_loss = -expected_reward.mean() else: actions_pred = self.actor_active(states) actor_loss = -self.critic_active(states, actions_pred).mean() # Execute gradient ascent for the actor self.actor_optimizer.zero_grad() actor_loss.backward() self.actor_optimizer.step() actor_loss = actor_loss.item() # Updates the target networks every n steps if self.t_step % self.update_target_every == 0: self._update_target_networks() # Returns the actor and critic losses to store on tensorboard return actor_loss, critic_loss def _get_targets(self, rewards, next_states): """ Calculate Yᵢ from target networks using the target actor and and distributed critic networks """ target_actions = self.actor_target(next_states) target_probs = self.critic_target(next_states, target_actions) # Project the categorical distribution projected_probs = self._get_value_distribution(rewards, target_probs) return projected_probs def _get_value_distribution(self, rewards, probs): """ Returns the projected value distribution for the input state/action pair """ delta_z = (self.v_max - self.v_min) / (self.num_atoms - 1) # Rewards were stored with the first reward followed by each of the discounted rewards, sum up the # reward with its discounted reward projected_atoms = rewards.unsqueeze( -1 ) + self.gamma**self.memory.rollout_length * self.atoms.unsqueeze(0) projected_atoms.clamp_(self.v_min, self.v_max) b = (projected_atoms - self.v_min) / delta_z # Professional level GPUs have floating point math that is more accurate # to the n'th degree than traditional GPUs. This might be due to binary # imprecision resulting in 99.000000001 ceil() rounding to 100 instead of 99. # According to sources, forcibly reducing the precision seems to be the only # solution to the problem. Luckily it doesn't result in any complications to # the accuracy of calculating the lower and upper bounds correctly precision = 1 b = torch.round(b * 10**precision) / 10**precision lower_bound = b.floor() upper_bound = b.ceil() m_lower = (upper_bound + (lower_bound == upper_bound).float() - b) * probs m_upper = (b - lower_bound) * probs projected_probs = torch.tensor(np.zeros(probs.size())).to(self.device) for idx in range(probs.size(0)): projected_probs[idx].index_add_(0, lower_bound[idx].long(), m_lower[idx].double()) projected_probs[idx].index_add_(0, upper_bound[idx].long(), m_upper[idx].double()) return projected_probs.float()
def subscribe(tenant, actor_id, worker_id, api_server, client_id, client_secret, access_token, refresh_token, worker_ch): """ Main loop for the Actor executor worker. Subscribes to the actor's inbox and executes actor containers when message arrive. Also subscribes to the worker channel for future communications. :return: """ logger.debug("Top of subscribe().") actor_ch = ActorMsgChannel(actor_id) try: leave_containers = Config.get('workers', 'leave_containers') except configparser.NoOptionError: logger.info("No leave_containers value configured.") leave_containers = False if hasattr(leave_containers, 'lower'): leave_containers = leave_containers.lower() == "true" logger.info("leave_containers: {}".format(leave_containers)) try: mem_limit = Config.get('workers', 'mem_limit') except configparser.NoOptionError: logger.info("No mem_limit value configured.") mem_limit = "-1" mem_limit = str(mem_limit) try: max_cpus = Config.get('workers', 'max_cpus') except configparser.NoOptionError: logger.info("No max_cpus value configured.") max_cpus = "-1" logger.info("max_cpus: {}".format(max_cpus)) ag = None if api_server and client_id and client_secret and access_token and refresh_token: logger.info("Creating agave client.") verify = get_tenant_verify(tenant) ag = Agave(api_server=api_server, token=access_token, refresh_token=refresh_token, api_key=client_id, api_secret=client_secret, verify=verify) else: logger.info("Not creating agave client.") logger.info("Starting the process worker channel thread.") t = threading.Thread(target=process_worker_ch, args=(tenant, worker_ch, actor_id, worker_id, actor_ch, ag)) t.start() logger.info("Worker subscribing to actor channel.") # keep track of whether we need to update the worker's status back to READY; otherwise, we # will hit redis with an UPDATE every time the subscription loop times out (i.e., every 2s) update_worker_status = True # shared global tracking whether this worker should keep running; shared between this thread and # the "worker channel processing" thread. global keep_running # main subscription loop -- processing messages from actor's mailbox while keep_running: if update_worker_status: Worker.update_worker_status(actor_id, worker_id, READY) update_worker_status = False try: msg, msg_obj = actor_ch.get_one() except channelpy.ChannelClosedException: logger.info("Channel closed, worker exiting...") keep_running = False sys.exit() logger.info("worker {} processing new msg.".format(worker_id)) try: Worker.update_worker_status(actor_id, worker_id, BUSY) except Exception as e: logger.error("unexpected exception from call to update_worker_status. Nacking message." "actor_id: {}; worker_id: {}; status: {}; exception: {}".format(actor_id, worker_id, BUSY, e)) msg_obj.nack(requeue=True) raise e update_worker_status = True logger.info("Received message {}. Starting actor container...".format(msg)) # the msg object is a dictionary with an entry called message and an arbitrary # set of k:v pairs coming in from the query parameters. message = msg.pop('message', '') try: actor = Actor.from_db(actors_store[actor_id]) execution_id = msg['_abaco_execution_id'] content_type = msg['_abaco_Content_Type'] mounts = actor.mounts logger.debug("actor mounts: {}".format(mounts)) except Exception as e: logger.error("unexpected exception retrieving actor, execution, content-type, mounts. Nacking message." "actor_id: {}; worker_id: {}; status: {}; exception: {}".format(actor_id, worker_id, BUSY, e)) msg_obj.nack(requeue=True) raise e # for results, create a socket in the configured directory. try: socket_host_path_dir = Config.get('workers', 'socket_host_path_dir') except (configparser.NoSectionError, configparser.NoOptionError) as e: logger.error("No socket_host_path configured. Cannot manage results data. Nacking message") Actor.set_status(actor_id, ERROR, msg="Abaco instance not configured for results data.") msg_obj.nack(requeue=True) raise e socket_host_path = '{}.sock'.format(os.path.join(socket_host_path_dir, worker_id, execution_id)) logger.info("Create socket at path: {}".format(socket_host_path)) # add the socket as a mount: mounts.append({'host_path': socket_host_path, 'container_path': '/_abaco_results.sock', 'format': 'ro'}) # for binary data, create a fifo in the configured directory. The configured # fifo_host_path_dir is equal to the fifo path in the worker container: fifo_host_path = None if content_type == 'application/octet-stream': try: fifo_host_path_dir = Config.get('workers', 'fifo_host_path_dir') except (configparser.NoSectionError, configparser.NoOptionError) as e: logger.error("No fifo_host_path configured. Cannot manage binary data.") Actor.set_status(actor_id, ERROR, msg="Abaco instance not configured for binary data. Nacking message.") msg_obj.nack(requeue=True) raise e fifo_host_path = os.path.join(fifo_host_path_dir, worker_id, execution_id) try: os.mkfifo(fifo_host_path) logger.info("Created fifo at path: {}".format(fifo_host_path)) except Exception as e: logger.error("Could not create fifo_path. Nacking message. Exception: {}".format(e)) msg_obj.nack(requeue=True) raise e # add the fifo as a mount: mounts.append({'host_path': fifo_host_path, 'container_path': '/_abaco_binary_data', 'format': 'ro'}) # the execution object was created by the controller, but we need to add the worker id to it now that we # know which worker will be working on the execution. logger.debug("Adding worker_id to execution.") try: Execution.add_worker_id(actor_id, execution_id, worker_id) except Exception as e: logger.error("Unexpected exception adding working_id to the Execution. Nacking message. Exception: {}".format(e)) msg_obj.nack(requeue=True) raise e # privileged dictates whether the actor container runs in privileged mode and if docker daemon is mounted. privileged = False if type(actor['privileged']) == bool and actor['privileged']: privileged = True logger.debug("privileged: {}".format(privileged)) # overlay resource limits if set on actor: if actor.mem_limit: mem_limit = actor.mem_limit if actor.max_cpus: max_cpus = actor.max_cpus # retrieve the default environment registered with the actor. environment = actor['default_environment'] logger.debug("Actor default environment: {}".format(environment)) # construct the user field from the actor's uid and gid: user = get_container_user(actor) logger.debug("Final user valiue: {}".format(user)) # overlay the default_environment registered for the actor with the msg # dictionary environment.update(msg) environment['_abaco_access_token'] = '' environment['_abaco_actor_dbid'] = actor_id environment['_abaco_actor_id'] = actor.id environment['_abaco_worker_id'] = worker_id environment['_abaco_container_repo'] = actor.image environment['_abaco_actor_state'] = actor.state environment['_abaco_actor_name'] = actor.name or 'None' logger.debug("Overlayed environment: {}".format(environment)) # if we have an agave client, get a fresh set of tokens: if ag: try: ag.token.refresh() token = ag.token.token_info['access_token'] environment['_abaco_access_token'] = token logger.info("Refreshed the tokens. Passed {} to the environment.".format(token)) except Exception as e: logger.error("Got an exception trying to get an access token. Stoping worker and nacking message. " "Exception: {}".format(e)) msg_obj.nack(requeue=True) raise e else: logger.info("Agave client `ag` is None -- not passing access token.") logger.info("Passing update environment: {}".format(environment)) try: stats, logs, final_state, exit_code, start_time = execute_actor(actor_id, worker_id, execution_id, image, message, user, environment, privileged, mounts, leave_containers, fifo_host_path, socket_host_path, mem_limit, max_cpus) except DockerStartContainerError as e: logger.error("Worker {} got DockerStartContainerError: {} trying to start actor for execution {}." "Placing message back on queue.".format(worker_id, e, execution_id)) # if we failed to start the actor container, we leave the worker up and re-queue the original # message; NOTE - we use the "low level" put() instead of put_message() because we have the # exact message we want to place in the queue; put_message is used by the controller to msg_obj.nack(requeue=True) logger.debug('message requeued.') continue except DockerStopContainerError as e: logger.error("Worker {} was not able to stop actor for execution: {}; Exception: {}. " "Putting the actor in error status and shutting down workers.".format(worker_id, execution_id, e)) Actor.set_status(actor_id, ERROR, "Error executing container: {}".format(e)) # since the error was with stopping the actor, we will consider this message "processed"; this choice # could be reconsidered/changed msg_obj.ack() shutdown_workers(actor_id, delete_actor_ch=False) # wait for worker to be shutdown.. time.sleep(600) break except Exception as e: logger.error("Worker {} got an unexpected exception trying to run actor for execution: {}." "Putting the actor in error status and shutting down workers. Exception: {}; type: {}".format(worker_id, execution_id, e, type(e))) Actor.set_status(actor_id, ERROR, "Error executing container: {}".format(e)) # the execute_actor function raises a DockerStartContainerError if it met an exception before starting the # actor container; if the container was started, then another exception should be raised. Therefore, # we can assume here that the container was at least started and we can ack the message. msg_obj.ack() shutdown_workers(actor_id, delete_actor_ch=False) # wait for worker to be shutdown.. time.sleep(600) break # ack the message msg_obj.ack() # Add the completed stats to the execution logger.info("Actor container finished successfully. Got stats object:{}".format(str(stats))) Execution.finalize_execution(actor_id, execution_id, COMPLETE, stats, final_state, exit_code, start_time) logger.info("Added execution: {}".format(execution_id)) # Add the logs to the execution Execution.set_logs(execution_id, logs) logger.info("Added execution logs.") # Update the worker's last updated and last execution fields: try: Worker.update_worker_execution_time(actor_id, worker_id) except KeyError: # it is possible that this worker was sent a gracful shutdown command in the other thread # and that spawner has already removed this worker from the store. logger.info("worker {} got unexpected key error trying to update its execution time. " "Worker better be shutting down! keep_running: {}".format(worker_id, keep_running)) if keep_running: logger.error("worker couldn't update's its execution time but keep_running is still true!") logger.info("worker time stamps updated.")
def main(): order_book_id_number = 10 toy_data = create_toy_data(order_book_ids_number=order_book_id_number, feature_number=20, start="2019-05-01", end="2019-12-12", frequency="D") env = PortfolioTradingGym(data_df=toy_data, sequence_window=5, add_cash=True) env = Numpy(env) env = ch.envs.Logger(env, interval=1000) env = ch.envs.Torch(env) env = ch.envs.Runner(env) # create net action_size = env.action_space.shape[0] number_asset, seq_window, features_number = env.observation_space.shape input_size = features_number actor = Actor(input_size=input_size, hidden_size=50, action_size=action_size) critic = Critic(input_size=input_size, hidden_size=50, action_size=action_size) target_actor = create_target_network(actor) target_critic = create_target_network(critic) actor_optimiser = optim.Adam(actor.parameters(), lr=LEARNING_RATE_ACTOR) critic_optimiser = optim.Adam(critic.parameters(), lr=LEARNING_RATE_CRITIC) replay = ch.ExperienceReplay() ou_noise = OrnsteinUhlenbeckNoise(mu=np.zeros(action_size)) def get_action(state): action = actor(state) action = action + ou_noise()[0] return action def get_random_action(state): action = torch.softmax(torch.randn(action_size), dim=0) return action for step in range(1, MAX_STEPS + 1): with torch.no_grad(): if step < UPDATE_START: replay += env.run(get_random_action, steps=1) else: replay += env.run(get_action, steps=1) replay = replay[-REPLAY_SIZE:] if step > UPDATE_START and step % UPDATE_INTERVAL == 0: sample = random.sample(replay, BATCH_SIZE) batch = ch.ExperienceReplay(sample) next_values = target_critic(batch.next_state(), target_actor(batch.next_state())).view( -1, 1) values = critic(batch.state(), batch.action()).view(-1, 1) rewards = ch.normalize(batch.reward()) #rewards = batch.reward()/100.0 change the convergency a lot value_loss = ch.algorithms.ddpg.state_value_loss( values, next_values.detach(), rewards, batch.done(), DISCOUNT) critic_optimiser.zero_grad() value_loss.backward() critic_optimiser.step() # Update policy by one step of gradient ascent policy_loss = -critic(batch.state(), actor(batch.state())).mean() actor_optimiser.zero_grad() policy_loss.backward() actor_optimiser.step() # Update target networks ch.models.polyak_average(target_critic, critic, POLYAK_FACTOR) ch.models.polyak_average(target_actor, actor, POLYAK_FACTOR)
def put(self, actor_id): logger.debug("top of PUT /actors/{}".format(actor_id)) dbid = Actor.get_dbid(g.tenant, actor_id) try: actor = Actor.from_db(actors_store[dbid]) except KeyError: logger.debug("did not find actor {} in store.".format(dbid)) raise ResourceError( "No actor found with id: {}.".format(actor_id), 404) previous_image = actor.image previous_status = actor.status previous_owner = actor.owner args = self.validate_put(actor) logger.debug("PUT args validated successfully.") args['tenant'] = g.tenant # user can force an update by setting the force param: update_image = args.get('force') if not update_image and args['image'] == previous_image: logger.debug("new image is the same and force was false. not updating actor.") logger.debug("Setting status to the actor's previous status which is: {}".format(previous_status)) args['status'] = previous_status else: update_image = True args['status'] = SUBMITTED logger.debug("new image is different. updating actor.") args['api_server'] = g.api_server # we do not allow a PUT to override the owner in case the PUT is issued by another user args['owner'] = previous_owner use_container_uid = args.get('use_container_uid') if Config.get('web', 'case') == 'camel': use_container_uid = args.get('useContainerUid') try: use_tas = Config.get('workers', 'use_tas_uid') except configparser.NoOptionError: logger.debug("no use_tas_uid config.") use_tas = False if hasattr(use_tas, 'lower'): use_tas = use_tas.lower() == 'true' else: logger.error("use_tas_uid configured but not as a string. use_tas_uid: {}".format(use_tas)) logger.debug("use_tas={}. user_container_uid={}".format(use_tas, use_container_uid)) if use_tas and not use_container_uid: uid, gid, tasdir = get_tas_data(g.user, g.tenant) if uid and gid: args['uid'] = uid args['gid'] = gid if tasdir: args['tasdir'] = tasdir args['mounts'] = get_all_mounts(args) args['last_update_time'] = get_current_utc_time() logger.debug("update args: {}".format(args)) actor = Actor(**args) actors_store[actor.db_id] = actor.to_db() logger.info("updated actor {} stored in db.".format(actor_id)) if update_image: worker_ids = [Worker.request_worker(tenant=g.tenant, actor_id=actor.db_id)] ch = CommandChannel() ch.put_cmd(actor_id=actor.db_id, worker_ids=worker_ids, image=actor.image, tenant=args['tenant']) ch.close() logger.debug("put new command on command channel to update actor.") # put could have been issued by a user with if not previous_owner == g.user: set_permission(g.user, actor.db_id, UPDATE) return ok(result=actor.display(), msg="Actor updated successfully.")
def run(seed, noise_type, layer_norm, **kwargs): """Configure things.""" rank = MPI.COMM_WORLD.Get_rank() if rank != 0: logger.set_level(logger.DISABLED) """Create Simulation envs.""" env = PegintoHoles() """Create True envs""" # env = Env_robot_control() """Parse noise_type""" action_noise = None param_noise = None nb_actions = env.action_dim for current_noise_type in noise_type.split(','): current_noise_type = current_noise_type.strip() if current_noise_type == 'none': pass elif 'adaptive-param' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = OrnsteinUhlenbeckActionNoise( mu=np.zeros(nb_actions), sigma=float(0.2) * np.ones(nb_actions)) param_noise = AdaptiveParamNoiseSpec( initial_stddev=float(stddev), desired_action_stddev=float(stddev)) elif 'normal' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = NormalActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) elif 'ou' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = OrnsteinUhlenbeckActionNoise( mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) else: raise RuntimeError( 'unknown noise type "{}"'.format(current_noise_type)) """Configure components.""" memory = Memory(limit=int(1e6), action_shape=env.action_dim, observation_shape=env.state_dim) critic = Critic(layer_norm=layer_norm) actor = Actor(nb_actions, layer_norm=layer_norm) """Seed everything to make things reproducible.""" seed = seed + 1000000 * rank logger.info('rank {}: seed={}, logdir={}'.format(rank, seed, logger.get_dir())) tf.reset_default_graph() set_global_seeds(seed) """Disable logging to avoid noise.""" start_time = time.time() """Train the model""" training.train(env=env, param_noise=param_noise, action_noise=action_noise, actor=actor, critic=critic, memory=memory, **kwargs) """Eval the result""" logger.info('total runtime: {}s'.format(time.time() - start_time))
def post(self, actor_id): def get_hypermedia(actor, exc): return {'_links': {'self': '{}/actors/v2/{}/executions/{}'.format(actor.api_server, actor.id, exc), 'owner': '{}/profiles/v2/{}'.format(actor.api_server, actor.owner), 'messages': '{}/actors/v2/{}/messages'.format(actor.api_server, actor.id)},} logger.debug("top of POST /actors/{}/messages.".format(actor_id)) dbid = Actor.get_dbid(g.tenant, actor_id) try: Actor.from_db(actors_store[dbid]) except KeyError: logger.debug("did not find actor: {}.".format(actor_id)) raise ResourceError("No actor found with id: {}.".format(actor_id), 404) args = self.validate_post() d = {} # build a dictionary of k:v pairs from the query parameters, and pass a single # additional object 'message' from within the post payload. Note that 'message' # need not be JSON data. logger.debug("POST body validated. actor: {}.".format(actor_id)) for k, v in request.args.items(): if k == 'message': continue d[k] = v logger.debug("extra fields added to message from query parameters: {}.".format(d)) if hasattr(g, 'user'): d['_abaco_username'] = g.user logger.debug("_abaco_username: {} added to message.".format(g.user)) if hasattr(g, 'api_server'): d['_abaco_api_server'] = g.api_server logger.debug("_abaco_api_server: {} added to message.".format(g.api_server)) # if hasattr(g, 'jwt'): # d['_abaco_jwt'] = g.jwt # if hasattr(g, 'jwt_server'): # d['_abaco_jwt_server'] = g.jwt_server if hasattr(g, 'jwt_header_name'): d['_abaco_jwt_header_name'] = g.jwt_header_name logger.debug("abaco_jwt_header_name: {} added to message.".format(g.jwt_header_name)) # create an execution exc = Execution.add_execution(dbid, {'cpu': 0, 'io': 0, 'runtime': 0, 'status': SUBMITTED, 'executor': g.user}) logger.info("Execution {} added for actor {}".format(exc, actor_id)) d['_abaco_execution_id'] = exc d['_abaco_Content_Type'] = args.get('_abaco_Content_Type', '') logger.debug("Final message dictionary: {}".format(d)) ch = ActorMsgChannel(actor_id=dbid) ch.put_msg(message=args['message'], d=d) ch.close() logger.debug("Message added to actor inbox. id: {}.".format(actor_id)) # make sure at least one worker is available actor = Actor.from_db(actors_store[dbid]) actor.ensure_one_worker() logger.debug("ensure_one_worker() called. id: {}.".format(actor_id)) if args.get('_abaco_Content_Type') == 'application/octet-stream': result = {'execution_id': exc, 'msg': 'binary - omitted'} else: result={'execution_id': exc, 'msg': args['message']} result.update(get_hypermedia(actor, exc)) case = Config.get('web', 'case') if not case == 'camel': return ok(result) else: return ok(dict_to_camel(result))
parser.add_argument('--demo-length',type=int, default=sys.maxsize, help='number of demo episodes to run') parser.add_argument('--distance', action='store_true', help='shows model with the distance version of per') parser.add_argument('--impact', action='store_true', help='shows model with the impact version of per') args = parser.parse_args() if args.distance: model_path = os.path.join("models/HandManipulateBlock-v0/distance/" "model.pt") elif args.impact: model_path = os.path.join("models/HandManipulateBlock-v0/impact/","model.pt") else: model_path = os.path.join("models/HandManipulateBlock-v0/normal/","model.pt") env = gym.make('HandManipulateBlock-v0') env_params = get_params(env) mean_obs, std_obs, mean_g, std_g, model = torch.load(model_path, map_location=lambda storage, loc: storage) agent = Actor(env_params, 256) agent.load_state_dict(model) for __ in range(args.demo_length): state = env.reset() state = Normalize(state, mean_obs, std_obs, mean_g, std_g) for _ in range (env._max_episode_steps): env.render() with torch.no_grad(): action = agent.forward(state) action = action.detach().numpy().squeeze() new_state, reward, _, info = env.step(action) new_state = Normalize(new_state, mean_obs, std_obs, mean_g, std_g) state = new_state
def check_metrics(self, actor_ids): for actor_id in actor_ids: logger.debug("TOP OF CHECK METRICS") query = { 'query': 'message_count_for_actor_{}'.format(actor_id.decode("utf-8").replace('-', '_')), 'time': datetime.datetime.utcnow().isoformat() + "Z" } r = requests.get(PROMETHEUS_URL + '/api/v1/query', params=query) data = json.loads(r.text)['data']['result'] change_rate = 0 try: previous_data = last_metric[actor_id] try: change_rate = int(data[0]['value'][1]) - int(previous_data[0]['value'][1]) except: logger.debug("Could not calculate change rate.") except: logger.info("No previous data yet for new actor {}".format(actor_id)) last_metric.update({actor_id: data}) # Add a worker if message count reaches a given number try: logger.debug("METRICS current message count: {}".format(data[0]['value'][1])) if int(data[0]['value'][1]) >= 1: tenant, aid = actor_id.decode('utf8').split('_') logger.debug('METRICS Attempting to create a new worker for {}'.format(actor_id)) try: # create a worker & add to this actor actor = Actor.from_db(actors_store[actor_id]) worker_ids = [Worker.request_worker(tenant=tenant, actor_id=aid)] logger.info("New worker id: {}".format(worker_ids[0])) ch = CommandChannel() ch.put_cmd(actor_id=actor.db_id, worker_ids=worker_ids, image=actor.image, tenant=tenant, num=1, stop_existing=False) ch.close() logger.debug('METRICS Added worker successfully for {}'.format(actor_id)) except Exception as e: logger.debug("METRICS - SOMETHING BROKE: {} - {} - {}".format(type(e), e, e.args)) elif int(data[0]['value'][1]) <= 1: logger.debug("METRICS made it to scale down block") # Check the number of workers for this actor before deciding to scale down workers = Worker.get_workers(actor_id) logger.debug('METRICS NUMBER OF WORKERS: {}'.format(len(workers))) try: if len(workers) == 1: logger.debug("METRICS only one worker, won't scale down") else: while len(workers) > 0: logger.debug('METRICS made it STATUS check') worker = workers.popitem()[1] logger.debug('METRICS SCALE DOWN current worker: {}'.format(worker['status'])) # check status of the worker is ready if worker['status'] == 'READY': logger.debug("METRICS I MADE IT") # scale down try: shutdown_worker(worker['id']) continue except Exception as e: logger.debug('METRICS ERROR shutting down worker: {} - {} - {}'.format(type(e), e, e.args)) logger.debug('METRICS shut down worker {}'.format(worker['id'])) except IndexError: logger.debug('METRICS only one worker found for actor {}. ' 'Will not scale down'.format(actor_id)) except Exception as e: logger.debug("METRICS SCALE UP FAILED: {}".format(e)) except Exception as e: logger.debug("METRICS - ANOTHER ERROR: {} - {} - {}".format(type(e), e, e.args))
class Agent: def __init__(self, input_dims, alpha=0.001, beta=0.002, env=None, gamma=0.99, n_actions=2, max_size=1000000, tau=0.005, hd1=400, hd2=300, batch_size=64, noise=0.1): self.gamma = gamma self.tau = tau self.batch_size = batch_size self.n_actions = n_actions self.noise = noise self.memory = MemoryBuffer(max_size) self.max_action = env.action_space.high[0] self.min_action = env.action_space.low[0] self.actor = Actor(n_actions=n_actions) self.critic = Critic() self.target_actor = Actor(n_actions=n_actions) self.target_critic = Critic() self.actor.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=alpha)) self.critic.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=beta)) self.target_actor.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=alpha)) self.target_critic.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=alpha)) self.update_weights() def remember(self, state, action, reward, next_state, done): self.memory.add(state, action, reward, next_state, done) def train(self): cl, al = self.learn() if cl is not None: self.update_weights() return cl, al def update_weights(self, tau=None): if tau is None: tau = self.tau weights = [] targets = self.target_actor.weights for i, weight in enumerate(self.actor.weights): weights.append(weight * tau + targets[i] * (1 - tau)) self.target_actor.set_weights(weights) weights = [] targets = self.target_critic.weights for i, weight in enumerate(self.critic.weights): weights.append(weight * tau + targets[i] * (1 - tau)) self.target_critic.set_weights(weights) def choose_action(self, observation, evaluate=False): state = tf.convert_to_tensor([observation], dtype=tf.float32) actions = self.actor(state) if not evaluate: actions += tf.random.normal(shape=[self.n_actions], mean=0.0, stddev=self.noise) actions = tf.clip_by_value(actions, self.min_action, self.max_action) return actions[0] # @tf.function def learn(self): if len(self.memory) < self.batch_size: return None, None states, actions, rewards, next_states, done = self.memory.sample(self.batch_size) states = tf.convert_to_tensor(states, dtype=tf.float32) actions = tf.convert_to_tensor(actions, dtype=tf.float32) rewards = tf.convert_to_tensor(rewards, dtype=tf.float32) next_states = tf.convert_to_tensor(next_states, dtype=tf.float32) with tf.GradientTape() as tape: target_actions = self.target_actor(next_states) critic_value_ = tf.squeeze(self.target_critic(next_states, target_actions), 1) critic_value = tf.squeeze(self.critic(states, actions), 1) target = rewards + self.gamma * critic_value_ * (1 - done) critic_loss = tf.keras.losses.MSE(target, critic_value) critic_gradient = tape.gradient(critic_loss, self.critic.trainable_variables) self.critic.optimizer.apply_gradients( zip(critic_gradient, self.critic.trainable_variables) ) with tf.GradientTape() as tape: new_policy_actions = self.actor(states) actor_loss = -self.critic(states, new_policy_actions) actor_loss = tf.math.reduce_mean(actor_loss) actor_gradient = tape.gradient(actor_loss, self.actor.trainable_variables) self.actor.optimizer.apply_gradients( zip(actor_gradient, self.actor.trainable_variables) ) self.update_weights()
def run(env_id, seed, noise_type, layer_norm, evaluation, **kwargs): # Configure things. # Create envs. env = gym.make(env_id) env = bench.Monitor(env, os.path.join(logger.get_dir(), '0')) if evaluation: eval_env = gym.make(env_id) eval_env = bench.Monitor(eval_env, os.path.join(logger.get_dir(), 'gym_eval')) #env = bench.Monitor(env, None) else: eval_env = None # Parse noise_type action_noise = None param_noise = None nb_actions = env.action_space.shape[-1] for current_noise_type in noise_type.split(','): current_noise_type = current_noise_type.strip() if current_noise_type == 'none': pass elif 'adaptive-param' in current_noise_type: _, stddev = current_noise_type.split('_') param_noise = AdaptiveParamNoiseSpec( initial_stddev=float(stddev), desired_action_stddev=float(stddev)) elif 'normal' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = NormalActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) elif 'ou' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = OrnsteinUhlenbeckActionNoise( mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) else: raise RuntimeError( 'unknown noise type "{}"'.format(current_noise_type)) # Configure components. memory = Memory(limit=int(1e6), action_shape=env.action_space.shape, observation_shape=env.observation_space.shape) critic = Critic(layer_norm=layer_norm) actor = Actor(nb_actions, layer_norm=layer_norm) # Seed everything to make things reproducible. seed = seed logger.info('seed={}, logdir={}'.format(seed, logger.get_dir())) tf.reset_default_graph() set_global_seeds(seed) env.seed(seed) if eval_env is not None: eval_env.seed(seed) start_time = time.time() training.train(env=env, eval_env=eval_env, param_noise=param_noise, action_noise=action_noise, actor=actor, critic=critic, memory=memory, **kwargs) env.close() if eval_env is not None: eval_env.close() logger.info('total runtime: {}s'.format(time.time() - start_time))
from collections import deque import random import torch from torch import optim from tqdm import tqdm from hyperparams import ACTION_NOISE, DISCOUNT, HIDDEN_SIZE, LEARNING_RATE, MAX_STEPS, POLICY_DELAY, POLYAK_FACTOR, REPLAY_SIZE, TARGET_ACTION_NOISE, TARGET_ACTION_NOISE_CLIP, UPDATE_INTERVAL, UPDATE_START from hyperparams import OFF_POLICY_BATCH_SIZE as BATCH_SIZE from env import Env from models import Actor, Critic, create_target_network, update_target_network from utils import plot env = Env() actor = Actor(HIDDEN_SIZE) critic_1 = Critic(HIDDEN_SIZE, state_action=True) critic_2 = Critic(HIDDEN_SIZE, state_action=True) target_actor = create_target_network(actor) target_critic_1 = create_target_network(critic_1) target_critic_2 = create_target_network(critic_2) actor_optimiser = optim.Adam(actor.parameters(), lr=LEARNING_RATE) critics_optimiser = optim.Adam(list(critic_1.parameters()) + list(critic_2.parameters()), lr=LEARNING_RATE) D = deque(maxlen=REPLAY_SIZE) state, done, total_reward = env.reset(), False, 0 pbar = tqdm(range(1, MAX_STEPS + 1), unit_scale=1, smoothing=0) for step in pbar: with torch.no_grad(): if step < UPDATE_START: # To improve exploration take actions sampled from a uniform random distribution over actions at the start of training action = torch.tensor([[2 * random.random() - 1]])
def main(worker_id, image): """ Main function for the worker process. This function """ logger.info("Entering main() for worker: {}, image: {}".format( worker_id, image)) spawner_worker_ch = SpawnerWorkerChannel(worker_id=worker_id) # first, attempt to pull image from docker hub: try: logger.info("Worker pulling image {}...".format(image)) pull_image(image) except DockerError as e: # return a message to the spawner that there was an error pulling image and abort # this is not necessarily an error state: the user simply could have provided an # image name that does not exist in the registry. This is the first time we would # find that out. logger.info( "worker got a DockerError trying to pull image. Error: {}.".format( e)) spawner_worker_ch.put({'status': 'error', 'msg': str(e)}) raise e logger.info("Image {} pulled successfully.".format(image)) # inform spawner that image pulled successfully and, simultaneously, # wait to receive message from spawner that it is time to subscribe to the actor channel logger.debug("Worker waiting on message from spawner...") result = spawner_worker_ch.put_sync({'status': 'ok'}) logger.info( "Worker received reply from spawner. result: {}.".format(result)) # should be OK to close the spawner_worker_ch on the worker side since spawner was first client # to open it. spawner_worker_ch.close() if result['status'] == 'error': # we do not expect to get an error response at this point. this needs investigation logger.error( "Worker received error message from spawner: {}. Quiting...". format(str(result))) raise WorkerException(str(result)) actor_id = result.get('actor_id') tenant = result.get('tenant') logger.info( "Worker received ok from spawner. Message: {}, actor_id:{}".format( result, actor_id)) api_server = None client_id = None client_secret = None access_token = None refresh_token = None if result.get('client') == 'yes': logger.info("Got client: yes, result: {}".format(result)) api_server = result.get('api_server') client_id = result.get('client_id') client_secret = result.get('client_secret') access_token = result.get('access_token') refresh_token = result.get('refresh_token') else: logger.info("Did not get client:yes, got result:{}".format(result)) try: Actor.set_status(actor_id, READY, status_message=" ") except KeyError: # it is possible the actor was already deleted during worker start up; if # so, the worker should have a stop message waiting for it. starting subscribe # as usual should allow this process to work as expected. pass logger.info("Actor status set to READY. subscribing to inbox.") worker_ch = WorkerChannel(worker_id=worker_id) subscribe(tenant, actor_id, worker_id, api_server, client_id, client_secret, access_token, refresh_token, worker_ch)
def __init__(self, state_dim, action_dim, max_action, memory, args): # misc self.criterion = nn.MSELoss() self.state_dim = state_dim self.action_dim = action_dim self.max_action = max_action self.memory = memory self.n = args.n_actor # actor self.actors = [ Actor(state_dim, action_dim, max_action, layer_norm=args.layer_norm) for i in range(self.n) ] self.actors_target = [ Actor(state_dim, action_dim, max_action, layer_norm=args.layer_norm) for i in range(self.n) ] self.actors_optimizer = [ torch.optim.Adam(self.actors[i].parameters(), lr=args.actor_lr) for i in range(self.n) ] for i in range(self.n): self.actors_target[i].load_state_dict(self.actors[i].state_dict()) # critic self.critic = CriticTD3(state_dim, action_dim, layer_norm=args.layer_norm) self.critic_target = CriticTD3(state_dim, action_dim, layer_norm=args.layer_norm) self.critic_target.load_state_dict(self.critic.state_dict()) self.critic_optimizer = torch.optim.Adam(self.critic.parameters(), lr=args.critic_lr) # cuda if torch.cuda.is_available(): for i in range(self.n): self.actors[i] = self.actors[i].cuda() self.actors_target[i] = self.actors_target[i].cuda() self.critic = self.critic.cuda() self.critic_target = self.critic_target.cuda() # shared memory for i in range(self.n): self.actors[i].share_memory() self.actors_target[i].share_memory() self.critic.share_memory() self.critic_target.share_memory() # hyper-parameters self.tau = args.tau self.discount = args.discount self.batch_size = args.batch_size self.policy_noise = args.policy_noise self.noise_clip = args.noise_clip self.policy_freq = args.policy_freq
class DDPGAgent: def __init__(self, env, gamma, tau, buffer_maxlen, batch_size, critic_learning_rate, actor_learning_rate, update_per_step, seed): self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") # hyperparameters self.num_replay_updates_per_step = update_per_step self.batch_size = batch_size self.gamma = gamma self.tau = tau # initialize actor and critic networks self.critic = Critic(env.observation_space.shape[0], env.action_space.shape[0], seed).to(self.device) self.critic_target = Critic(env.observation_space.shape[0], env.action_space.shape[0], seed).to(self.device) self.actor = Actor(env.observation_space.shape[0], env.action_space.shape[0], seed).to(self.device) self.actor_target = Actor(env.observation_space.shape[0], env.action_space.shape[0], seed).to(self.device) # optimizers self.critic_optimizer = optim.Adam(self.critic.parameters(), lr=critic_learning_rate) self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=actor_learning_rate) self.buffer = ReplayBuffer(buffer_maxlen, batch_size, seed) self.noise = OUNoise(env.action_space.shape[0]) def get_action(self, state): state = torch.FloatTensor(state).to(self.device) self.actor.eval() with torch.no_grad(): action = self.actor(state) self.actor.train() action = action.cpu().numpy() return action def step(self, state, action, reward, next_state, done): # Save experience in replay buffer self.buffer.add(state, action, reward, next_state, done) q_loss, policy_loss = None, None # If enough samples are available in buffer, get random subset and learn if len(self.buffer) >= self.batch_size: # update the network "num_replay_updates_per_step" times in each step for _ in range(self.num_replay_updates_per_step): experiences = self.buffer.sample() q_loss, policy_loss = self.learn(experiences) q_loss = q_loss.detach().item() policy_loss = policy_loss.detach().item() return q_loss, policy_loss def learn(self, experiences): """Updating actor and critic parameters based on sampled experiences from replay buffer.""" states, actions, rewards, next_states, dones = experiences curr_Q = self.critic(states, actions) next_actions = self.actor_target(next_states).detach() next_Q = self.critic_target(next_states, next_actions).detach() target_Q = rewards + self.gamma * next_Q * (1 - dones) # losses q_loss = F.mse_loss(curr_Q, target_Q) policy_loss = -self.critic(states, self.actor(states)).mean() # update actor self.actor_optimizer.zero_grad() policy_loss.backward() self.actor_optimizer.step() # update critic self.critic_optimizer.zero_grad() q_loss.backward() self.critic_optimizer.step() # update target networks for target_param, param in zip(self.actor_target.parameters(), self.actor.parameters()): target_param.data.copy_(param.data * self.tau + target_param.data * (1.0 - self.tau)) for target_param, param in zip(self.critic_target.parameters(), self.critic.parameters()): target_param.data.copy_(param.data * self.tau + target_param.data * (1.0 - self.tau)) return q_loss, policy_loss
def subscribe(tenant, actor_id, worker_id, api_server, client_id, client_secret, access_token, refresh_token, worker_ch): """ Main loop for the Actor executor worker. Subscribes to the actor's inbox and executes actor containers when message arrive. Also subscribes to the worker channel for future communications. :return: """ logger.debug("Top of subscribe().") actor_ch = ActorMsgChannel(actor_id) try: leave_containers = Config.get('workers', 'leave_containers') except configparser.NoOptionError: leave_containers = False ag = None if api_server and client_id and client_secret and access_token and refresh_token: logger.info("Creating agave client.") verify = get_tenant_verify(tenant) ag = Agave(api_server=api_server, token=access_token, refresh_token=refresh_token, api_key=client_id, api_secret=client_secret, verify=verify) else: logger.info("Not creating agave client.") logger.info("Starting the process worker channel thread.") t = threading.Thread(target=process_worker_ch, args=(tenant, worker_ch, actor_id, worker_id, actor_ch, ag)) t.start() logger.info("Worker subscribing to actor channel.") update_worker_status = True global keep_running while keep_running: if update_worker_status: Worker.update_worker_status(actor_id, worker_id, READY) update_worker_status = False try: msg = actor_ch.get(timeout=2) except channelpy.ChannelTimeoutException: continue except channelpy.ChannelClosedException: logger.info("Channel closed, worker exiting...") keep_running = False sys.exit() update_worker_status = True logger.info( "Received message {}. Starting actor container...".format(msg)) # the msg object is a dictionary with an entry called message and an arbitrary # set of k:v pairs coming in from the query parameters. message = msg.pop('message', '') actor = Actor.from_db(actors_store[actor_id]) execution_id = msg['_abaco_execution_id'] content_type = msg['_abaco_Content_Type'] mounts = actor.mounts logger.debug("actor mounts: {}".format(mounts)) # for binary data, create a fifo in the configured directory. The configured # fifo_host_path_dir is equal to the fifo path in the worker container: fifo_host_path = None if content_type == 'application/octet-stream': try: fifo_host_path_dir = Config.get('workers', 'fifo_host_path_dir') except (configparser.NoSectionError, configparser.NoOptionError): logger.error( "No fifo_host_path configured. Cannot manage binary data.") Actor.set_status( actor_id, ERROR, msg="Abaco instance not configured for binary data.") continue fifo_host_path = os.path.join(fifo_host_path_dir, worker_id, execution_id) logger.info("Create fifo at path: {}".format(fifo_host_path)) try: os.mkfifo(fifo_host_path) except Exception as e: logger.error( "Could not create fifo_path. Exception: {}".format(e)) raise e # add the fifo as a mount: mounts.append({ 'host_path': fifo_host_path, 'container_path': '/_abaco_binary_data', 'format': 'ro' }) # the execution object was created by the controller, but we need to add the worker id to it now that we # know which worker will be working on the execution. logger.debug("Adding worker_id to execution.") Execution.add_worker_id(actor_id, execution_id, worker_id) # privileged dictates whether the actor container runs in privileged mode and if docker daemon is mounted. privileged = False if type(actor['privileged']) == bool and actor['privileged']: privileged = True logger.debug("privileged: {}".format(privileged)) # retrieve the default environment registered with the actor. environment = actor['default_environment'] logger.debug("Actor default environment: {}".format(environment)) # construct the user field from the actor's uid and gid: user = get_container_user(actor) logger.debug("Final user valiue: {}".format(user)) # overlay the default_environment registered for the actor with the msg # dictionary environment.update(msg) environment['_abaco_access_token'] = '' environment['_abaco_actor_dbid'] = actor_id environment['_abaco_actor_id'] = actor.id environment['_abaco_actor_state'] = actor.state logger.debug("Overlayed environment: {}".format(environment)) # if we have an agave client, get a fresh set of tokens: if ag: try: ag.token.refresh() token = ag.token.token_info['access_token'] environment['_abaco_access_token'] = token logger.info( "Refreshed the tokens. Passed {} to the environment.". format(token)) except Exception as e: logger.error( "Got an exception trying to get an access token: {}". format(e)) else: logger.info( "Agave client `ag` is None -- not passing access token.") logger.info("Passing update environment: {}".format(environment)) try: stats, logs, final_state, exit_code, start_time = execute_actor( actor_id, worker_id, worker_ch, image, message, user, environment, privileged, mounts, leave_containers, fifo_host_path) except DockerStartContainerError as e: logger.error("Got DockerStartContainerError: {}".format(e)) Actor.set_status(actor_id, ERROR, "Error executing container: {}".format(e)) continue # Add the completed stats to the execution logger.info( "Actor container finished successfully. Got stats object:{}". format(str(stats))) Execution.finalize_execution(actor_id, execution_id, COMPLETE, stats, final_state, exit_code, start_time) logger.info("Added execution: {}".format(execution_id)) # Add the logs to the execution Execution.set_logs(execution_id, logs) logger.info("Added execution logs.") # Update the worker's last updated and last execution fields: Worker.update_worker_execution_time(actor_id, worker_id) logger.info("worker time stamps updated.")
class DDPG(): """Interacts with and learns from the environment.""" def __init__(self, state_size, action_size, random_seed, hyper, num_agents, memory): self.action_size = action_size self.num_agents = num_agents # Actor Network (w/ Target Network) self.actor_local = Actor(state_size, action_size, random_seed).to(device) self.actor_target = Actor(state_size, action_size, random_seed).to(device) self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=hyper['LR_ACTOR']) # Critic Network (w/ Target Network) self.critic_local = Critic(state_size, action_size, num_agents, random_seed).to(device) self.critic_target = Critic(state_size, action_size, num_agents, random_seed).to(device) self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=hyper['LR_CRITIC']) #, weight_decay=hyper['WEIGHT_DECAY']) # Noise process self.noise = OUNoise(action_size, random_seed) self.t = 0 self.memory = memory def step(self, state, action, reward, next_state, done, others_states,others_actions, others_next_states): self.memory.add(state, action, reward, next_state, done, others_states, others_actions, others_next_states) self.t = (self.t + 1) % hyper['UPDATE_EVERY'] if self.t == 0: if len(self.memory) > hyper['BATCH_SIZE']: experiences = self.memory.sample() self.learn(experiences, hyper['GAMMA']) def act(self, states, add_noise=True): states = torch.from_numpy(states).float().to(device) self.actor_local.eval() with torch.no_grad(): actions = self.actor_local(states).cpu().data.numpy() self.actor_local.train() if add_noise: actions += self.noise.sample() return np.clip(actions, -1, 1) def reset(self): self.noise.reset() def learn(self, experiences, gamma): (states, actions, rewards, next_states, dones, others_states, others_actions, others_next_states) = experiences rewards_ = rewards all_states = torch.cat((states, others_states), dim=1).to(device) all_actions = torch.cat((actions, others_actions), dim=1).to(device) all_next_states = torch.cat((next_states, others_next_states), dim=1).to(device) # --------------------------- update critic --------------------------- l_all_next_actions = [] l_all_next_actions.append(self.actor_target(states)) l_all_next_actions.append(self.actor_target(others_states)) all_next_actions = torch.cat(l_all_next_actions, dim=1).to(device) Q_targets_next = self.critic_target(all_next_states, all_next_actions) Q_targets = rewards_ + (gamma * Q_targets_next * (1 - dones)) Q_expected = self.critic_local(all_states, all_actions) critic_loss = F.mse_loss(Q_expected, Q_targets) # Minimize the loss self.critic_optimizer.zero_grad() critic_loss.backward() # torch.nn.utils.clip_grad_norm_(self.critic_local.parameters(), 1) self.critic_optimizer.step() # --------------------------- update actor --------------------------- this_actions_pred = self.actor_local(states) others_actions_pred = self.actor_local(others_states) others_actions_pred = others_actions_pred.detach() actions_pred = torch.cat((this_actions_pred, others_actions_pred), dim=1).to(device) actor_loss = -self.critic_local(all_states, actions_pred).mean() # Minimize the loss self.actor_optimizer.zero_grad() actor_loss.backward() self.actor_optimizer.step() # ---------------------- update target networks ---------------------- self.soft_update(self.critic_local, self.critic_target, hyper['TAU']) self.soft_update(self.actor_local, self.actor_target, hyper['TAU']) def soft_update(self, local_model, target_model, tau): iter_params = zip(target_model.parameters(), local_model.parameters()) for target_param, local_param in iter_params: tensor_aux = tau*local_param.data + (1.0-tau)*target_param.data target_param.data.copy_(tensor_aux)
class TestCapstone(unittest.TestCase): def setUp(self): self.app = APP self.client = self.app.test_client database_name = "capstone_test" database_username = "******" database_password = "******" self.database_path = "postgresql://{}:{}@{}/{}".format( database_username, database_password, 'localhost:5432', database_name) setup_db(self.app, self.database_path) with self.app.app_context(): db.drop_all() db.create_all() self.executive_token = os.getenv("EXECUTIVE_TOKEN") self.director_token = os.getenv("DIRECTOR_TOKEN") self.assistant_token = os.getenv("ASSISTANT_TOKEN") self.existing_actor = Actor(name="Brad", age=45, gender="M") self.existing_actor.create() self.existing_movie = Movie( title="Once Upon", release_date="2019-10-04 19:09:33.77486") self.existing_movie.create() def tearDown(self): with self.app.app_context(): db.session.rollback() db.session.close() def test_precreated_actor_exists(self): actor = Actor.query.filter_by(name="Brad", age=45, gender="M").first() self.assertIsNotNone(actor) def test_precreated_movie_exists(self): movie = Movie.query.filter_by( title="Once Upon", release_date="2019-10-04 19:09:33.77486").first() self.assertIsNotNone(movie) def test_assistant_should_get_all_actors(self): actor = Actor(name="Abls", age=123, gender="M") actor.create() res = self.client().get( '/actors', headers={ "Authorization": "Bearer {}".format( self.assistant_token)}) data = json.loads(res.data) self.assertEqual(res.status_code, 200) self.assertTrue(data['success']) actors = Actor.query.all() self.assertEqual(len(data['actors']), len(actors)) def test_director_should_get_all_actors(self): actor = Actor(name="Abls", age=123, gender="M") actor.create() res = self.client().get( '/actors', headers={ "Authorization": "Bearer {}".format( self.director_token)}) data = json.loads(res.data) self.assertEqual(res.status_code, 200) self.assertTrue(data['success']) actors = Actor.query.all() self.assertEqual(len(data['actors']), len(actors)) def test_executive_should_get_all_actors(self): actor = Actor(name="Abls", age=123, gender="M") actor.create() res = self.client().get( '/actors', headers={ "Authorization": "Bearer {}".format( self.executive_token)}) data = json.loads(res.data) self.assertEqual(res.status_code, 200) self.assertTrue(data['success']) actors = Actor.query.all() self.assertEqual(len(data['actors']), len(actors)) def test_assistant_should_get_all_movies(self): movie = Movie( title="Test Title", release_date="2012-04-23 18:25:43.511") movie.create() res = self.client().get( '/movies', headers={ "Authorization": "Bearer {}".format( self.assistant_token)}) data = json.loads(res.data) self.assertEqual(res.status_code, 200) self.assertTrue(data['success']) movies = Movie.query.all() self.assertEqual(len(data['movies']), len(movies)) def test_director_should_get_all_movies(self): movie = Movie( title="Test Title", release_date="2012-04-23 18:25:43.511") movie.create() res = self.client().get( '/movies', headers={ "Authorization": "Bearer {}".format( self.director_token)}) data = json.loads(res.data) self.assertEqual(res.status_code, 200) self.assertTrue(data['success']) movies = Movie.query.all() self.assertEqual(len(data['movies']), len(movies)) def test_executive_should_get_all_movies(self): movie = Movie( title="Test Title", release_date="2012-04-23 18:25:43.511") movie.create() res = self.client().get( '/movies', headers={ "Authorization": "Bearer {}".format( self.executive_token)}) data = json.loads(res.data) self.assertEqual(res.status_code, 200) self.assertTrue(data['success']) movies = Movie.query.all() self.assertEqual(len(data['movies']), len(movies)) def test_assistant_cant_create_actor(self): res = self.client().post( '/actors', headers={ "Authorization": "Bearer {}".format( self.assistant_token)}, json=sample_actor) data = json.loads(res.data) self.assertEqual(res.status_code, 401) self.assertFalse(data['success']) def test_director_should_create_actor(self): res = self.client().post( '/actors', headers={ "Authorization": "Bearer {}".format( self.director_token)}, json=sample_actor) data = json.loads(res.data) self.assertEqual(res.status_code, 201) self.assertTrue(data['success']) def test_executive_should_create_actor(self): res = self.client().post( '/actors', headers={ "Authorization": "Bearer {}".format( self.executive_token)}, json=sample_actor) data = json.loads(res.data) self.assertEqual(res.status_code, 201) self.assertTrue(data['success']) createdId = data['created'] actor = Actor.query.get(createdId) self.assertIsNotNone(actor) self.assertEqual(actor.id, createdId) def test_incorrect_create_actor(self): res = self.client().post( '/actors', headers={ "Authorization": "Bearer {}".format( self.executive_token)}) self.assertEqual(res.status_code, 400) data = json.loads(res.data) self.assertFalse(data['success']) def test_assistant_cant_create_movie(self): res = self.client().post( '/movies', headers={ "Authorization": "Bearer {}".format( self.assistant_token)}, json=sample_movie) data = json.loads(res.data) self.assertEqual(res.status_code, 401) self.assertFalse(data['success']) def test_director_cant_create_movie(self): res = self.client().post( '/movies', headers={ "Authorization": "Bearer {}".format( self.director_token)}, json=sample_movie) data = json.loads(res.data) self.assertEqual(res.status_code, 401) self.assertFalse(data['success']) def test_executive_should_create_movie(self): res = self.client().post( '/movies', headers={ "Authorization": "Bearer {}".format( self.executive_token)}, json=sample_movie) data = json.loads(res.data) self.assertEqual(res.status_code, 201) self.assertTrue(data['success']) createdId = data['created'] movie = Movie.query.get(createdId) self.assertIsNotNone(movie) self.assertEqual(movie.id, createdId) def test_incorrect_create_movie(self): res = self.client().post( '/movies', headers={ "Authorization": "Bearer {}".format( self.executive_token)}) self.assertEqual(res.status_code, 400) data = json.loads(res.data) self.assertFalse(data['success']) def test_assistant_cant_patch_actor(self): actor = Actor.query.filter_by(name="Brad", age=45, gender="M").first() res = self.client().patch( '/actors', headers={ "Authorization": "Bearer {}".format( self.assistant_token)}, json=dict( id=actor.id)) data = json.loads(res.data) self.assertEqual(res.status_code, 401) self.assertFalse(data['success']) def test_director_should_patch_actor(self): actor = Actor.query.filter_by(name="Brad", age=45, gender="M").first() res = self.client().patch( '/actors', headers={ "Authorization": "Bearer {}".format( self.director_token)}, json=dict( id=actor.id, name="NewName", age=22, gender="F")) data = json.loads(res.data) self.assertEqual(res.status_code, 200) self.assertTrue(data['success']) patched_actor = data['patched'] self.assertEqual(actor.id, patched_actor.get('id')) self.assertEqual("NewName", patched_actor.get('name')) self.assertEqual(22, patched_actor.get('age')) self.assertEqual("F", patched_actor.get('gender')) def test_executive_should_patch_actor(self): actor = Actor.query.filter_by(name="Brad", age=45, gender="M").first() res = self.client().patch( '/actors', headers={ "Authorization": "Bearer {}".format( self.executive_token)}, json=dict( id=actor.id, name="NewName", age=22, gender="F")) data = json.loads(res.data) self.assertEqual(res.status_code, 200) self.assertTrue(data['success']) patched_actor = data['patched'] self.assertEqual(actor.id, patched_actor.get('id')) self.assertEqual("NewName", patched_actor.get('name')) self.assertEqual(22, patched_actor.get('age')) self.assertEqual("F", patched_actor.get('gender')) def test_assistant_cant_patch_movie(self): movie = Movie.query.filter_by( title="Once Upon", release_date="2019-10-04 19:09:33.77486").first() new_title = "New Title" new_release_date = "2020-11-04 19:09:33.77486" res = self.client().patch( '/movies', headers={ "Authorization": "Bearer {}".format( self.assistant_token)}, json=dict( id=movie.id, title=new_title, release_date=new_release_date)) data = json.loads(res.data) self.assertEqual(res.status_code, 401) self.assertFalse(data['success']) def test_director_should_patch_movie(self): movie = Movie.query.filter_by( title="Once Upon", release_date="2019-10-04 19:09:33.774860").first() new_title = "New Title" new_release_date = "2020-11-04 19:09:33.774860" res = self.client().patch( '/movies', headers={ "Authorization": "Bearer {}".format( self.director_token)}, json=dict( id=movie.id, title=new_title, release_date=new_release_date)) data = json.loads(res.data) self.assertEqual(res.status_code, 200) self.assertTrue(data['success']) newMovie = Movie.query.get(movie.id) self.assertEqual(newMovie.title, new_title) self.assertEqual(newMovie.release_date.strftime( "%Y-%m-%d %H:%M:%S.%f"), new_release_date) def test_executive_should_patch_movie(self): movie = Movie.query.filter_by( title="Once Upon", release_date="2019-10-04 19:09:33.774860").first() new_title = "New Title" new_release_date = "2020-11-04 19:09:33.774860" res = self.client().patch( '/movies', headers={ "Authorization": "Bearer {}".format( self.executive_token)}, json=dict( id=movie.id, title=new_title, release_date=new_release_date)) data = json.loads(res.data) self.assertEqual(res.status_code, 200) self.assertTrue(data['success']) newMovie = Movie.query.get(movie.id) self.assertEqual(newMovie.title, new_title) self.assertEqual(newMovie.release_date.strftime( "%Y-%m-%d %H:%M:%S.%f"), new_release_date) def test_assistant_cant_delete_actor(self): actor = Actor.query.filter_by(name="Brad", age=45, gender="M").first() self.assertIsNotNone(actor) res = self.client().delete('/actors/{}'.format( actor.id), headers={"Authorization": "Bearer {}".format( self.assistant_token)}) self.assertEqual(res.status_code, 401) def test_director_cant_delete_actor(self): actor = Actor.query.filter_by(name="Brad", age=45, gender="M").first() self.assertIsNotNone(actor) res = self.client().delete('/actors/{}'.format( actor.id), headers={"Authorization": "Bearer {}".format( self.director_token)}) data = json.loads(res.data) self.assertEqual(res.status_code, 200) self.assertEqual(data['deleted']['id'], actor.id) def test_executive_should_delete_actor(self): actor = Actor.query.filter_by(name="Brad", age=45, gender="M").first() self.assertIsNotNone(actor) res = self.client().delete('/actors/{}'.format( actor.id), headers={"Authorization": "Bearer {}".format( self.executive_token)}) data = json.loads(res.data) self.assertEqual(res.status_code, 200) self.assertEqual(data['deleted']['id'], actor.id) def test_assistant_cant_delete_movie(self): movie = Movie.query.filter_by( title="Once Upon", release_date="2019-10-04 19:09:33.77486").first() self.assertIsNotNone(movie) res = self.client().delete('/movies/{}'.format( movie.id), headers={"Authorization": "Bearer {}".format( self.assistant_token)}) self.assertEqual(res.status_code, 401) def test_director_cant_delete_movie(self): movie = Movie.query.filter_by( title="Once Upon", release_date="2019-10-04 19:09:33.77486").first() self.assertIsNotNone(movie) res = self.client().delete('/movies/{}'.format( movie.id), headers={"Authorization": "Bearer {}".format( self.director_token)}) self.assertEqual(res.status_code, 401) def test_executive_should_delete_movie(self): movie = Movie.query.filter_by( title="Once Upon", release_date="2019-10-04 19:09:33.77486").first() self.assertIsNotNone(movie) res = self.client().delete('/movies/{}'.format( movie.id), headers={"Authorization": "Bearer {}".format( self.executive_token)}) data = json.loads(res.data) self.assertEqual(res.status_code, 200) self.assertEqual(data['deleted']['id'], movie.id) def test_nonexisting_route(self): res = self.client().get('/nonexisting') self.assertEqual(res.status_code, 404)
from sqlalchemy import create_engine from sqlalchemy.ext.declarative import declarative_base from models import Actor, Role, ActorRole, engine Base = declarative_base() engine = create_engine('sqlite:///actor_roles.db') Base.metadata.create_all(engine) session = sessionmaker() session.configure(bind=engine) Base.metadata.bind = engine session = session() bale = Actor(name="Christian Bale") hathaway = Actor(name="Anne Hathaway") pfeiffer = Actor(name="Michelle Pfeiffer") keaton = Actor(name="Michael Keaton") arnett = Actor(name="Will Arnett") batman = Role(character="Batman") catwoman = Role(character="Catwoman") burry = Role(character="Dr. Michael Burry") american_psycho = Role(character="Patrick Bateman") batman.actors.append(bale) batman.actors.append(keaton) batman.actors.append(arnett) catwoman.actors.append(pfeiffer) catwoman.actors.append(hathaway)