def client_generation(self, actor_id, worker_id, tenant): client_ch = ClientsChannel() try: client_msg = client_ch.request_client( tenant=tenant, actor_id=actor_id, worker_id=worker_id, secret=self.secret ) except Exception as e: logger.error("Got a ChannelTimeoutException trying to generate a client for " "actor_id: {}; worker_id: {}; exception: {}".format(actor_id, worker_id, e)) # put worker in an error state and return self.error_out_actor(actor_id, worker_id, "Abaco was unable to generate an OAuth client for a new " "worker for this actor. System administrators have been notified.") client_ch.close() Worker.update_worker_status(actor_id, worker_id, ERROR) logger.critical("Client generation FAILED.") raise e client_ch.close() if client_msg.get('status') == 'error': logger.error("Error generating client: {}".format(client_msg.get('message'))) self.error_out_actor(actor_id, worker_id, "Abaco was unable to generate an OAuth client for a new " "worker for this actor. System administrators have been notified.") Worker.update_worker_status(actor_id, worker_id, ERROR) raise SpawnerException("Error generating client") #TODO - clean up error message # else, client was generated successfully: else: logger.info("Got a client: {}, {}, {}".format(client_msg['client_id'], client_msg['access_token'], client_msg['refresh_token'])) return client_msg['client_id'], \ client_msg['access_token'], \ client_msg['refresh_token'], \ client_msg['api_server'], \ client_msg['client_secret']
def process(self, cmd): """Main spawner method for processing a command from the CommandChannel.""" logger.info("Spawner processing new command:{}".format(cmd)) actor_id = cmd['actor_id'] worker_ids = cmd['worker_ids'] image = cmd['image'] tenant = cmd['tenant'] stop_existing = cmd.get('stop_existing', True) num_workers = cmd.get('num', self.num_workers) logger.info("command params: actor_id: {} worker_ids: {} image: {} stop_existing: {} mum_workers: {}".format( actor_id, worker_ids, image, tenant, stop_existing, num_workers)) try: new_channels, anon_channels, new_workers = self.start_workers(actor_id, worker_ids, image, tenant, num_workers) except SpawnerException as e: # for now, start_workers will do clean up for a SpawnerException, so we just need # to return back to the run loop. logger.info("Spawner returning to main run loop.") return logger.info("Created new workers: {}".format(new_workers)) # stop any existing workers: if stop_existing: logger.info("Stopping existing workers: {}".format(worker_ids)) self.stop_workers(actor_id, worker_ids) # add workers to store first so that the records will be there when the workers go # to update their status if not stop_existing: # if we're not stopping the existing workers, we need to add each worker to the # actor's collection. for _, worker in new_workers.items(): logger.info("calling add_worker for worker: {}.".format(worker)) Worker.add_worker(actor_id, worker) else: # since we're stopping the existing workers, the actor's collection should just # be equal to the new_workers. workers_store[actor_id] = new_workers logger.info("workers_store set to new_workers: {}.".format(new_workers)) # Tell new worker to subscribe to the actor channel. # If abaco is configured to generate clients for the workers, generate them now # and send new workers their clients. generate_clients = Config.get('workers', 'generate_clients').lower() logger.info("Sending messages to new workers over anonymous channels to subscribe to inbox.") for idx, channel in enumerate(anon_channels): if generate_clients == 'true': worker_id = new_workers[list(new_workers)[idx]]['id'] logger.info("Getting client for worker number {}, id: {}".format(idx, worker_id)) client_ch = ClientsChannel() try: client_msg = client_ch.request_client(tenant=tenant, actor_id=actor_id, # new_workers is a dictionary of dictionaries; list(d) creates a # list of keys for a dictionary d. hence, the idx^th entry # of list(ner_workers) should be the key. worker_id=worker_id, secret=self.secret) except ChannelTimeoutException as e: logger.error("Got a ChannelTimeoutException trying to generate a client for " "actor_id: {}; worker_id: {}; exception: {}".format(actor_id, worker_id, e)) # put actor in an error state and return self.error_out_actor(actor_id, worker_id, "Abaco was unable to generate an OAuth client for a new " "worker for this actor. System administrators have been notified.") client_ch.close() return client_ch.close() # we need to ignore errors when generating clients because it's possible it is not set up for a specific # tenant. we log it instead. if client_msg.get('status') == 'error': logger.error("Error generating client: {}".format(client_msg.get('message'))) channel.put({'status': 'ok', 'actor_id': actor_id, 'tenant': tenant, 'client': 'no'}) logger.debug("Sent OK message over anonymous worker channel.") # else, client was generated successfully: else: logger.info("Got a client: {}, {}, {}".format(client_msg['client_id'], client_msg['access_token'], client_msg['refresh_token'])) channel.put({'status': 'ok', 'actor_id': actor_id, 'tenant': tenant, 'client': 'yes', 'client_id': client_msg['client_id'], 'client_secret': client_msg['client_secret'], 'access_token': client_msg['access_token'], 'refresh_token': client_msg['refresh_token'], 'api_server': client_msg['api_server'], }) logger.debug("Sent OK message AND client over anonymous worker channel.") else: logger.info("Not generating clients. Config value was: {}".format(generate_clients)) channel.put({'status': 'ok', 'actor_id': actor_id, 'tenant': tenant, 'client': 'no'}) logger.debug("Sent OK message over anonymous worker channel.") # @TODO - # delete the anonymous channel from this thread but sleep first to avoid the race condition. time.sleep(1.5) channel.delete() # due to the race condition deleting channels (potentially before all workers have received all messages) # we put a sleep here. time.sleep(1) for ch in new_channels: try: # the new_channels are the spawnerworker channels so they can be deleted. ch.delete() except Exception as e: logger.error("Got exception trying to delete spawnerworker channel: {}".format(e)) logger.info("Done processing command.")
def process(self, cmd): print("Processing cmd:{}".format(str(cmd))) actor_id = cmd['actor_id'] worker_ids = cmd['worker_ids'] image = cmd['image'] tenant = cmd['tenant'] stop_existing = cmd.get('stop_existing', True) num_workers = cmd.get('num', self.num_workers) print("Actor id:{}".format(actor_id)) try: new_channels, anon_channels, new_workers = self.start_workers( actor_id, worker_ids, image, tenant, num_workers) except SpawnerException as e: # for now, start_workers will do clean up for a SpawnerException, so we just need # to return back to the run loop. return print("Created new workers: {}".format(str(new_workers))) # stop any existing workers: if stop_existing: self.stop_workers(actor_id, worker_ids) # add workers to store first so that the records will be there when the workers go # to update their status if not stop_existing: for _, worker in new_workers.items(): Worker.add_worker(actor_id, worker) else: workers_store[actor_id] = new_workers # Tell new worker to subscribe to the actor channel. # If abaco is configured to generate clients for the workers, generate them now # and send new workers their clients. generate_clients = Config.get('workers', 'generate_clients').lower() for idx, channel in enumerate(anon_channels): if generate_clients == 'true': print("Getting client for worker {}".format(idx)) client_ch = ClientsChannel() client_msg = client_ch.request_client( tenant=tenant, actor_id=actor_id, # new_workers is a dictionary of dictionaries; list(d) creates a # list of keys for a dictionary d. hence, the idx^th entry # of list(ner_workers) should be the key. worker_id=new_workers[list(new_workers)[idx]]['id'], secret=self.secret) # we need to ignore errors when generating clients because it's possible it is not set up for a specific # tenant. we log it instead. if client_msg.get('status') == 'error': print("Error generating client: {}".format( client_msg.get('message'))) channel.put({ 'status': 'ok', 'actor_id': actor_id, 'tenant': tenant, 'client': 'no' }) # else, client was generated successfully: else: print("Got a client: {}, {}, {}".format( client_msg['client_id'], client_msg['access_token'], client_msg['refresh_token'])) channel.put({ 'status': 'ok', 'actor_id': actor_id, 'tenant': tenant, 'client': 'yes', 'client_id': client_msg['client_id'], 'client_secret': client_msg['client_secret'], 'access_token': client_msg['access_token'], 'refresh_token': client_msg['refresh_token'], 'api_server': client_msg['api_server'], }) else: print("Not generating clients. Config value was: {}".format( generate_clients)) channel.put({ 'status': 'ok', 'actor_id': actor_id, 'tenant': tenant, 'client': 'no' }) print("Done processing command.")
def client_generation(self, actor_id, worker_id, tenant): need_a_client = True client_attempts = 0 while need_a_client and client_attempts < 10: client_attempts = client_attempts + 1 # take a break between each subsequent attempt after the first one: if client_attempts > 1: time.sleep(2) client_ch = ClientsChannel() logger.debug( f"trying to generate a client for worker {worker_id}; attempt: {client_attempts}." ) try: client_msg = client_ch.request_client(tenant=tenant, actor_id=actor_id, worker_id=worker_id, secret=self.secret) except Exception as e: logger.error( "Got a ChannelTimeoutException trying to generate a client for " "actor_id: {}; worker_id: {}; exception: {}".format( actor_id, worker_id, e)) if client_attempts == 10: # Update - 4/2020: we do NOT set the actor to an error statewhen client generation fails because # put worker in an error state and return # self.error_out_actor(actor_id, worker_id, "Abaco was unable to generate an OAuth client for new " # "worker {} for this actor. System administrators have been " # "notified. Actor will be put in error state and " # "must be updated before it will process".format(worker_id)) # Worker.update_worker_status(actor_id, worker_id, ERROR) try: client_ch.close() except Exception as e: logger.debug( f"got exception trying to close the client_ch: {e}" ) self.kill_worker(actor_id, worker_id) logger.critical("Client generation FAILED.") raise e try: client_ch.close() except Exception as e: logger.debug( f"got exception trying to close the client_ch: {e}") if client_msg.get('status') == 'error': logger.error( "Error generating client; worker_id: {}; message: {}". format(worker_id, client_msg.get('message'))) # check to see if the error was an error that cannot be retried: if 'AgaveClientFailedDoNotRetry' in client_msg.get('message'): logger.debug( f"got AgaveClientFailedDoNotRetry in message for worker {worker_id}. " f"Giving up and setting attempts directly to 10.") client_attempts = 10 if client_attempts == 10: # Update - 4/2020: we do NOT set the actor to an error statewhen client generation fails because # this is not something the user has control over. # self.error_out_actor(actor_id, worker_id, "Abaco was unable to generate an OAuth client for new " # "worker {} for this actor. System administrators " # "have been notified. Actor will be put in error state and " # "must be updated before it will process " # "messages.".format(worker_id)) # Worker.update_worker_status(actor_id, worker_id, ERROR) try: client_ch.close() except Exception as e: logger.debug( f"got exception trying to close the client_ch: {e}" ) self.kill_worker(actor_id, worker_id) raise SpawnerException("Error generating client" ) #TODO - clean up error message # else, client was generated successfully: else: logger.info("Got a client: {}, {}, {}".format( client_msg['client_id'], client_msg['access_token'], client_msg['refresh_token'])) return client_msg['client_id'], \ client_msg['access_token'], \ client_msg['refresh_token'], \ client_msg['api_server'], \ client_msg['client_secret']
def process(self, cmd): print("Processing cmd:{}".format(str(cmd))) actor_id = cmd['actor_id'] image = cmd['image'] tenant = cmd['tenant'] stop_existing = cmd.get('stop_existing', True) num_workers = cmd.get('num', self.num_workers) print("Actor id:{}".format(actor_id)) try: new_channels, anon_channels, new_workers = self.start_workers(actor_id, image, tenant, num_workers) except SpawnerException as e: # for now, start_workers will do clean up for a SpawnerException, so we just need # to return back to the run loop. return print("Created new workers: {}".format(str(new_workers))) # stop any existing workers: if stop_existing: self.stop_workers(actor_id) # add workers to store first so that the records will be there when the workers go # to update their status if not stop_existing: for _, worker in new_workers.items(): Worker.add_worker(actor_id, worker) else: workers_store[actor_id] = new_workers # send new workers their clients and tell them to subscribe to the actor channel. for idx, channel in enumerate(anon_channels): print("Getting client for worker {}".format(idx)) client_ch = ClientsChannel() client_msg = client_ch.request_client(tenant=tenant, actor_id=actor_id, # new_workers is a dictionary of dictionaries; list(d) creates a # list of keys for a dictionary d. hence, the idx^th entry # of list(ner_workers) should be the key. worker_id=new_workers[list(new_workers)[idx]]['ch_name'], secret=self.secret) # we need to ignore errors when generating clients because it's possible it is not set up for a specific # tenant. we log it instead. if client_msg.get('status') == 'error': print("Error generating client: {}".format(client_msg.get('message'))) channel.put({'status': 'ok', 'actor_id': actor_id, 'tenant': tenant, 'client': 'no'}) # else, client was generated successfully: else: print("Got a client: {}, {}, {}".format(client_msg['client_id'], client_msg['access_token'], client_msg['refresh_token'])) channel.put({'status': 'ok', 'actor_id': actor_id, 'tenant': tenant, 'client': 'yes', 'client_id': client_msg['client_id'], 'client_secret': client_msg['client_secret'], 'access_token': client_msg['access_token'], 'refresh_token': client_msg['refresh_token'], 'api_server': client_msg['api_server'], }) print("Done processing command.")