def list_laboratories_addresses(self): client = self._redis_maker() laboratory_addresses = { # laboratory_coord_address : { # experiment_instance_id : resource_instance # } } for experiment_type in client.smembers(WEBLAB_EXPERIMENT_TYPES): experiment_id = ExperimentId.parse(experiment_type) experiment_instance_names = client.smembers( WEBLAB_EXPERIMENT_INSTANCES % experiment_type) for experiment_instance_name in experiment_instance_names: experiment_instance_id = ExperimentInstanceId( experiment_instance_name, experiment_id.exp_name, experiment_id.cat_name) weblab_experiment_instance = WEBLAB_EXPERIMENT_INSTANCE % ( experiment_type, experiment_instance_name) laboratory_address = client.hget(weblab_experiment_instance, LAB_COORD) resource_str = client.hget(weblab_experiment_instance, RESOURCE_INST) resource = Resource.parse(resource_str) current = laboratory_addresses.get(laboratory_address, {}) current[experiment_instance_id] = resource laboratory_addresses[laboratory_address] = current return laboratory_addresses
def get_experiment_id(self, reservation_id): reservation_data = self.get_reservation_data(reservation_id) if reservation_data is None: raise CoordExc.ExpiredSessionError( "Expired reservation: no experiment id found for that reservation (%s)" % reservation_id) return ExperimentId.parse(reservation_data[EXPERIMENT_TYPE])
def reserve_experiment(self, reservation_id, experiment_id, time, priority, initialization_in_accounting, client_initial_data, request_info): server_uuids = list(request_info.get(SERVER_UUIDS, [])) server_uuids.append((self.core_server_uuid, self.core_server_uuid_human)) consumer_data = { 'time_allowed' : time, 'priority' : priority, 'initialization_in_accounting' : initialization_in_accounting, 'external_user' : request_info.get('username', ''), SERVER_UUIDS : server_uuids, } for forwarded_key in FORWARDED_KEYS: if forwarded_key in request_info: consumer_data[forwarded_key] = request_info[forwarded_key] # TODO: identifier of the server login_client = self._create_login_client() session_id = login_client.login(self.username, self.password) client = self._create_client(login_client.get_cookies()) serialized_client_initial_data = json.dumps(client_initial_data) serialized_consumer_data = json.dumps(consumer_data) # If the administrator has mapped that this experiment_id is other, take that other. Otherwide, take the same one requested_experiment_id_str = self.experiments_map.get(experiment_id.to_weblab_str(), experiment_id.to_weblab_str()) requested_experiment_id = ExperimentId.parse(requested_experiment_id_str) external_reservation = client.reserve_experiment(session_id, requested_experiment_id, serialized_client_initial_data, serialized_consumer_data) if external_reservation.is_null(): return None remote_reservation_id = external_reservation.reservation_id.id log.log(ExternalWebLabDeustoScheduler, log.level.Warning, "Local reservation_id %s is linked to remote reservation %s" % (reservation_id, remote_reservation_id)) cookies = client.get_cookies() serialized_cookies = pickle.dumps(cookies) redis_client = self.redis_maker() pipeline = redis_client.pipeline() pipeline.hset(self.external_weblabdeusto_reservations, reservation_id, json.dumps({ 'remote_reservation_id' : remote_reservation_id, 'cookies' : serialized_cookies, 'start_time' : time_mod.time(), })) external_weblabdeusto_pending_results = self.EXTERNAL_WEBLABDEUSTO_PENDING_RESULTS % (self.resource_type_name, self.core_server_route) pipeline.hset(external_weblabdeusto_pending_results, reservation_id, json.dumps({ 'remote_reservation_id' : remote_reservation_id, 'username' : request_info.get('username',''), 'serialized_request_info' : pickle.dumps(request_info), 'experiment_id_str' : experiment_id.to_weblab_str(), })) pipeline.execute() reservation_status = self._convert_reservation_to_status(external_reservation, reservation_id, remote_reservation_id) return reservation_status
def _initial_clean(self, coordination_configuration_parser): external_servers_config = coordination_configuration_parser.parse_external_servers( ) for external_server_str in external_servers_config: for resource_type_name in external_servers_config[ external_server_str]: self.resources_manager.add_experiment_id( ExperimentId.parse(external_server_str), resource_type_name)
def _initial_clean(self, coordination_configuration_parser): session = self._session_maker() try: external_servers_config = coordination_configuration_parser.parse_external_servers() for external_server_str in external_servers_config: for resource_type_name in external_servers_config[external_server_str]: self.resources_manager.add_experiment_id(session, ExperimentId.parse(external_server_str), resource_type_name) session.commit() finally: session.close()
def _process(self): redis_client = self.redis_maker() pending_results = [] for reservation_id in redis_client.hkeys(self.external_weblabdeusto_pending): pending_result_str = redis_client.hget(self.external_weblabdeusto_pending, reservation_id) if pending_result_str is not None: pending_result = json.loads(pending_result_str) pending_result['reservation_id'] = reservation_id pending_results.append(pending_result) if len(pending_results) > 0: try: session_id, client = self.create_client_func(None) except urllib2.URLError: # Remote server is down, try later return remote_reservation_ids = [ SessionId(pending_result['remote_reservation_id']) for pending_result in pending_results ] results = client.get_experiment_uses_by_id(session_id, remote_reservation_ids) for pending_result, result in zip(pending_results, results): if result.is_alive(): continue username = pending_result['username'] try: request_info = pickle.loads(pending_result['serialized_request_info'].encode('utf-8')) except Exception as e: log.log(ResultsRetriever, log.level.Critical, "Probably serialized_request_info was truncated in %s" % pending_result) log.log_exc(ResultsRetriever, log.level.Error) request_info = {'error' : 'could not be stored: %s' % e} reservation_id = pending_result['reservation_id'] remote_reservation_id = pending_result['remote_reservation_id'] if not redis_client.hdel(self.external_weblabdeusto_pending, reservation_id): log.log(ResultsRetriever, log.level.Info, "Pending reservation %r not found. Assuming it is managed by other thread" % pending_result) continue if result.is_finished(): use = result.experiment_use use.experiment_id = ExperimentId.parse(pending_result['experiment_id_str']) use.remote_reservation_id = remote_reservation_id use.reservation_id = reservation_id for key in [ key for key in request_info ]: if not isinstance(request_info[key], (basestring, numbers.Number)): request_info.pop(key) use.request_info = request_info callback = lambda : self.post_reservation_data_manager.delete(reservation_id) self.completed_store.put(username, use, callback) else: log.log(ResultsRetriever, log.level.Info, "Reservation id %s was cancelled and therefore not stored" % reservation_id)
def reserve_experiment(self, reservation_id, experiment_id, time, priority, initialization_in_accounting, client_initial_data, request_info): server_uuids = list(request_info.get(SERVER_UUIDS, [])) server_uuids.append((self.core_server_uuid, self.core_server_uuid_human)) consumer_data = { 'time_allowed' : time, 'priority' : priority, 'initialization_in_accounting' : initialization_in_accounting, 'external_user' : request_info.get('username', ''), SERVER_UUIDS : server_uuids, } for forwarded_key in FORWARDED_KEYS: if forwarded_key in request_info: consumer_data[forwarded_key] = request_info[forwarded_key] consumer_data['external_user_unique'] = request_info.get('username_unique', request_info.get('username', '')) # TODO: identifier of the server login_client = self._create_login_client() session_id = login_client.login(self.username, self.password) client = self._create_client(login_client.get_cookies()) serialized_client_initial_data = json.dumps(client_initial_data) serialized_consumer_data = json.dumps(consumer_data) # If the administrator has mapped that this experiment_id is other, take that other. Otherwide, take the same one requested_experiment_id_str = self.experiments_map.get(experiment_id.to_weblab_str(), experiment_id.to_weblab_str()) requested_experiment_id = ExperimentId.parse(requested_experiment_id_str) external_reservation = client.reserve_experiment(session_id, requested_experiment_id, serialized_client_initial_data, serialized_consumer_data) if external_reservation.is_null(): return None remote_reservation_id = external_reservation.reservation_id.id log.log(ExternalWebLabDeustoScheduler, log.level.Info, "Local reservation_id %s is linked to remote reservation %s" % (reservation_id, remote_reservation_id)) cookies = client.get_cookies() serialized_cookies = pickle.dumps(cookies) session = self.session_maker() try: reservation = ExternalWebLabDeustoReservation(reservation_id, remote_reservation_id, serialized_cookies, time_mod.time()) pending_results = ExternalWebLabDeustoReservationPendingResults(reservation_id, remote_reservation_id, self.resource_type_name, self.core_server_route, request_info.get('username', ''), pickle.dumps(request_info), experiment_id.to_weblab_str()) session.add(reservation) session.add(pending_results) session.commit() finally: session.close() reservation_status = self._convert_reservation_to_status(external_reservation, reservation_id, remote_reservation_id) return reservation_status
def _initial_clean(self, coordination_configuration_parser): session = self._session_maker() try: external_servers_config = coordination_configuration_parser.parse_external_servers( ) for external_server_str in external_servers_config: for resource_type_name in external_servers_config[ external_server_str]: self.resources_manager.add_experiment_id( session, ExperimentId.parse(external_server_str), resource_type_name) session.commit() finally: session.close()
def direct2experiment(): experiment_id_str = get_argument(EXPERIMENT_ID) if experiment_id_str is None: return "%s argument is missing" % EXPERIMENT_ID session_id_str = get_argument(SESSION_ID) if session_id_str is None: return "%s argument is missing" % EXPERIMENT_ID experiment_id = ExperimentId.parse(experiment_id_str) weblab_api.context.session_id = session_id_str try: reservation_id = weblab_api.api.reserve_experiment(experiment_id, "{}", "{}") except Exception: traceback.print_exc() return HTML_ERROR_TEMPLATE new_location = "../../client/federated.html#reservation_id=%s" % reservation_id.reservation_id.id return redirect(new_location)
def direct2experiment(): experiment_id_str = get_argument(EXPERIMENT_ID) if experiment_id_str is None: return "%s argument is missing" % EXPERIMENT_ID session_id_str = get_argument(SESSION_ID) if session_id_str is None: return "%s argument is missing" % EXPERIMENT_ID experiment_id = ExperimentId.parse(experiment_id_str) weblab_api.context.session_id = session_id_str try: reservation_id = weblab_api.api.reserve_experiment( experiment_id, "{}", "{}") except Exception: traceback.print_exc() return HTML_ERROR_TEMPLATE new_location = "../../client/federated.html#reservation_id=%s" % reservation_id.reservation_id.id return redirect(new_location)
def run(self): experiment_id_str = self.get_argument(EXPERIMENT_ID) if experiment_id_str is None: return "%s argument is missing" % EXPERIMENT_ID session_id_str = self.get_argument(SESSION_ID) if session_id_str is None: return "%s argument is missing" % EXPERIMENT_ID experiment_id = ExperimentId.parse(experiment_id_str) session_id = SessionId(session_id_str) address = RemoteFacadeContext.get_context().get_ip_address() client_address = ClientAddress.ClientAddress(address) try: reservation_id = self.server.reserve_experiment(session_id, experiment_id, "{}", "{}", client_address) except Exception: return HTML_ERROR_TEMPLATE new_location = "../../client/federated.html#reservation_id=%s" % reservation_id.reservation_id.id self.set_status(302) self.add_other_header('Location', new_location) return """<html><body><a href="%s">Click here</a></body></html>""" % new_location
def run(self): experiment_id_str = self.get_argument(EXPERIMENT_ID) if experiment_id_str is None: return "%s argument is missing" % EXPERIMENT_ID session_id_str = self.get_argument(SESSION_ID) if session_id_str is None: return "%s argument is missing" % EXPERIMENT_ID experiment_id = ExperimentId.parse(experiment_id_str) session_id = SessionId(session_id_str) address = RemoteFacadeContext.get_context().get_ip_address() client_address = ClientAddress.ClientAddress(address) try: reservation_id = self.server.reserve_experiment( session_id, experiment_id, "{}", "{}", client_address) except Exception: return HTML_ERROR_TEMPLATE new_location = "../../client/federated.html#reservation_id=%s" % reservation_id.reservation_id.id self.set_status(302) self.add_other_header('Location', new_location) return """<html><body><a href="%s">Click here</a></body></html>""" % new_location
def list_laboratories_addresses(self): client = self._redis_maker() laboratory_addresses = { # laboratory_coord_address : { # experiment_instance_id : resource_instance # } } for experiment_type in client.smembers(WEBLAB_EXPERIMENT_TYPES): experiment_id = ExperimentId.parse(experiment_type) experiment_instance_names = client.smembers(WEBLAB_EXPERIMENT_INSTANCES % experiment_type) for experiment_instance_name in experiment_instance_names: experiment_instance_id = ExperimentInstanceId(experiment_instance_name, experiment_id.exp_name, experiment_id.cat_name) weblab_experiment_instance = WEBLAB_EXPERIMENT_INSTANCE % (experiment_type, experiment_instance_name) laboratory_address = client.hget(weblab_experiment_instance, LAB_COORD) resource_str = client.hget(weblab_experiment_instance, RESOURCE_INST) resource = Resource.parse(resource_str) current = laboratory_addresses.get(laboratory_address, {}) current[experiment_instance_id] = resource laboratory_addresses[laboratory_address] = current return laboratory_addresses
def _update_queues(self): ########################################################### # There are reasons why a waiting reservation may not be # able to be promoted while the next one is. For instance, # if a user is waiting for "pld boards", but only for # instances of "pld boards" which have a "ud-binary@Binary # experiments" server running. If only a "ud-pld@PLD # Experiments" is available, then this user will not be # promoted and the another user which is waiting for a # "ud-pld@PLD Experiments" can be promoted. # # Therefore, we have a list of the IDs of the waiting # reservations we previously thought that they couldn't be # promoted in this iteration. They will have another # chance in the next run of _update_queues. # previously_waiting_reservation_ids = [] weblab_resource_pqueue_map = WEBLAB_RESOURCE_PQUEUE_MAP % self.resource_type_name weblab_resource_pqueue_sorted = WEBLAB_RESOURCE_PQUEUE_SORTED % self.resource_type_name weblab_resource_slots = WEBLAB_RESOURCE_SLOTS % self.resource_type_name ########################################################### # While there are free instances and waiting reservations, # take the first waiting reservation and set it to current # reservation. Make this repeatedly because we want to # commit each change # while True: client = self.redis_maker() filled_waiting_reservation_ids = client.zrangebyscore( weblab_resource_pqueue_sorted, -10000, +10000, start=0, num=len(previously_waiting_reservation_ids) + 1) first_waiting_reservation_id = None for filled_waiting_reservation_id in filled_waiting_reservation_ids: waiting_reservation_id = filled_waiting_reservation_id[ filled_waiting_reservation_id.find('_') + 1:] if waiting_reservation_id not in previously_waiting_reservation_ids: first_waiting_reservation_id = waiting_reservation_id break if first_waiting_reservation_id is None: return # There is no waiting reservation for this resource that we haven't already tried previously_waiting_reservation_ids.append( first_waiting_reservation_id) # # For the current resource_type, let's ask for # all the resource instances available (i.e. those # who are a member on weblab:resource:%s:slots ) # free_instances = [ Resource(self.resource_type_name, resource_instance) for resource_instance in client.smembers(weblab_resource_slots) ] if len(free_instances) == 0: # If there is no free instance, just return return # # Select the correct free_instance for the current student among # all the free_instances # if self.randomize_instances: randomized_free_instances = [ free_instance for free_instance in free_instances ] random.shuffle(randomized_free_instances) else: randomized_free_instances = sorted( free_instances, cmp=lambda r1, r2: cmp(r1.resource_type, r2.resource_type) or cmp(r1.resource_instance, r2.resource_instance)) for free_instance in randomized_free_instances: # # IMPORTANT: from here on every "continue" should first revoke the # reservations_manager and resources_manager confirmations # working = self.resources_manager.check_working(free_instance) if not working: # The instance is not working continue confirmed = self.reservations_manager.confirm( first_waiting_reservation_id) if not confirmed: # student has already been confirmed somewhere else, so don't try with other # instances, but rather with other student break acquired = self.resources_manager.acquire_resource( free_instance) # print "ACQUIRED", free_instance, acquired, time.time() if not acquired: # the instance has been acquired by someone else. unconfirm student and # try again with other free_instance self.reservations_manager.downgrade_confirmation( first_waiting_reservation_id) continue weblab_resource_pqueue_instance_reservations = WEBLAB_RESOURCE_PQUEUE_INSTANCE_RESERVATIONS % ( self.resource_type_name, free_instance.resource_instance) client.sadd(weblab_resource_pqueue_instance_reservations, first_waiting_reservation_id) weblab_reservation_pqueue = WEBLAB_RESOURCE_RESERVATION_PQUEUE % ( self.resource_type_name, first_waiting_reservation_id) pqueue_reservation_data_str = client.get( weblab_reservation_pqueue) reservation_data = self.reservations_manager.get_reservation_data( first_waiting_reservation_id) if pqueue_reservation_data_str is None or reservation_data is None: # the student is not here anymore; downgrading confirmation is not required # but releasing the resource is; and skip the rest of the free instances self.resources_manager.release_resource(free_instance) client.srem(weblab_resource_pqueue_instance_reservations, first_waiting_reservation_id) break pqueue_reservation_data = json.loads( pqueue_reservation_data_str) start_time = self.time_provider.get_time() total_time = pqueue_reservation_data[TIME] pqueue_reservation_data[START_TIME] = start_time pqueue_reservation_data[TIMESTAMP_BEFORE] = start_time pqueue_reservation_data[ ACTIVE_STATUS] = STATUS_WAITING_CONFIRMATION pqueue_reservation_data[ RESOURCE_INSTANCE] = free_instance.to_weblab_str() initialization_in_accounting = pqueue_reservation_data[ INITIALIZATION_IN_ACCOUNTING] client_initial_data = reservation_data[CLIENT_INITIAL_DATA] request_info = json.loads(reservation_data[REQUEST_INFO]) username = request_info.get('username') locale = request_info.get('locale') requested_experiment_type = ExperimentId.parse( reservation_data[EXPERIMENT_TYPE]) selected_experiment_instance = None experiment_instances = self.resources_manager.list_experiment_instance_ids_by_resource( free_instance) for experiment_instance in experiment_instances: if experiment_instance.to_experiment_id( ) == requested_experiment_type: selected_experiment_instance = experiment_instance if selected_experiment_instance is None: # This resource is not valid for this user, other free_instance should be # selected. Try with other, but first clean the acquired resources self.reservations_manager.downgrade_confirmation( first_waiting_reservation_id) self.resources_manager.release_resource(free_instance) client.srem(weblab_resource_pqueue_instance_reservations, first_waiting_reservation_id) continue pqueue_reservation_data[ EXPERIMENT_INSTANCE] = selected_experiment_instance.to_weblab_str( ) laboratory_coord_address = self.resources_manager.get_laboratory_coordaddress_by_experiment_instance_id( selected_experiment_instance) pqueue_reservation_data[LAB_COORD] = laboratory_coord_address client.set(weblab_reservation_pqueue, json.dumps(pqueue_reservation_data)) filled_reservation_id = client.hget( weblab_resource_pqueue_map, first_waiting_reservation_id) client.zrem(weblab_resource_pqueue_sorted, filled_reservation_id) # # Enqueue the confirmation, since it might take a long time # (for instance, if the laboratory server does not reply because # of any network problem, or it just takes too much in replying), # so this method might take too long. That's why we enqueue these # petitions and run them in other threads. # deserialized_server_initial_data = { 'priority.queue.slot.length': '%s' % total_time, 'priority.queue.slot.start': '%s' % datetime.datetime.fromtimestamp(start_time), 'priority.queue.slot.initialization_in_accounting': initialization_in_accounting, 'request.experiment_id.experiment_name': selected_experiment_instance.exp_name, 'request.experiment_id.category_name': selected_experiment_instance.cat_name, 'request.username': username, 'request.full_name': username, 'request.locale': locale, # TODO: add the username and user full name here } server_initial_data = json.dumps( deserialized_server_initial_data) # server_initial_data will contain information such as "what was the last experiment used?". # If a single resource was used by a binary experiment, then the next time may not require reprogramming the device self.confirmer.enqueue_confirmation( laboratory_coord_address, first_waiting_reservation_id, selected_experiment_instance, client_initial_data, server_initial_data, self.resource_type_name) # # After it, keep in the while True in order to add the next # reservation # break
def __init__(self, data_manager, locator, cfg_manager, ConfirmerClass): if ConfirmerClass is None: ConfirmerClass = Confirmer.ReservationConfirmer self.cfg_manager = cfg_manager self._data_manager = data_manager self.core_server_url = self.cfg_manager.get_value(CORE_SERVER_URL) self.notifier = AdminNotifier.AdminNotifier(self.cfg_manager) self.notifications_enabled = self.cfg_manager.get_value(RESOURCES_CHECKER_NOTIFICATIONS_ENABLED, DEFAULT_RESOURCES_CHECKER_NOTIFICATIONS_ENABLED) self.locator = locator # Used by ResourcesChecker self.confirmer = ConfirmerClass(self, locator) self.time_provider = self.CoordinatorTimeProvider() self.initial_store = TemporalInformationStore.InitialTemporalInformationStore() self.finished_store = TemporalInformationStore.FinishTemporalInformationStore() self.completed_store = TemporalInformationStore.CompletedInformationStore() self.finished_reservations_store = Queue.Queue() self._initialize_managers() # # The system administrator must define what scheduling system is used by each resource type # For instance: # # scheduling_systems = { # "pld boards" : ("PRIORITY_QUEUE", {}), # "fpga boards" : ("PRIORITY_QUEUE", {}), # "vm experiments" : ("BOOKING", { 'slots' : 30 * 1000 }), # Slots of 30 minutes # "something else" : ("EXTERNAL", { 'address' : 'http://192.168.1.50:8080/SchedulingServer', 'protocol' : 'SOAP' }) # If somebody else has implemented the scheduling schema in other language # } # self.schedulers = {} scheduling_systems = cfg_manager.get_value(CORE_SCHEDULING_SYSTEMS) for resource_type_name in scheduling_systems: scheduling_system, arguments = scheduling_systems[resource_type_name] if not scheduling_system in self.SCHEDULING_SYSTEMS: raise CoordExc.UnregisteredSchedulingSystemError("Unregistered scheduling system: %r" % scheduling_system) SchedulingSystemClass = self.SCHEDULING_SYSTEMS[scheduling_system] data_manager = self._data_manager generic_scheduler_arguments = Scheduler.GenericSchedulerArguments( cfg_manager = self.cfg_manager, resource_type_name = resource_type_name, reservations_manager = self.reservations_manager, resources_manager = self.resources_manager, confirmer = self.confirmer, data_manager = data_manager, time_provider = self.time_provider, core_server_url = self.core_server_url, initial_store = self.initial_store, finished_store = self.finished_store, completed_store = self.completed_store, post_reservation_data_manager = self.post_reservation_data_manager ) self.schedulers[resource_type_name] = SchedulingSystemClass(generic_scheduler_arguments, **arguments) self.aggregators = { # experiment_id_str : AGGREGATOR( schedulers ) } coordination_configuration_parser = CoordinationConfigurationParser.CoordinationConfigurationParser(cfg_manager) resource_types_per_experiment_id = coordination_configuration_parser.parse_resources_for_experiment_ids() # # This configuration argument has a dictionary such as: # { # 'experiment_id_str' : {'foo' : 'bar'} # } # # The argument itself is not mandatory. # aggregators_configuration = self.cfg_manager.get_value(CORE_SCHEDULER_AGGREGATORS, {}) for experiment_id_str in resource_types_per_experiment_id: generic_scheduler_arguments = Scheduler.GenericSchedulerArguments( cfg_manager = self.cfg_manager, resource_type_name = None, reservations_manager = self.reservations_manager, resources_manager = self.resources_manager, confirmer = self.confirmer, data_manager = self._data_manager, time_provider = self.time_provider, core_server_url = self.core_server_url, initial_store = self.initial_store, finished_store = self.finished_store, completed_store = self.completed_store, post_reservation_data_manager = self.post_reservation_data_manager ) resource_type_names = resource_types_per_experiment_id[experiment_id_str] try: aggregated_schedulers = {} for resource_type_name in resource_type_names: aggregated_schedulers[resource_type_name] = self.schedulers[resource_type_name] except KeyError, ke: raise Exception("Scheduler not found with resource type name %s. Check %s config property." % (ke, CORE_SCHEDULING_SYSTEMS)) particular_configuration = aggregators_configuration.get(experiment_id_str) aggregator = self.AGGREGATOR(generic_scheduler_arguments, ExperimentId.parse(experiment_id_str), aggregated_schedulers, particular_configuration) self.aggregators[experiment_id_str] = aggregator
def reserve_experiment(self, reservation_id, experiment_id, time, priority, initialization_in_accounting, client_initial_data, request_info): server_uuids = list(request_info.get(SERVER_UUIDS, [])) server_uuids.append( (self.core_server_uuid, self.core_server_uuid_human)) consumer_data = { 'time_allowed': time, 'priority': priority, 'initialization_in_accounting': initialization_in_accounting, 'external_user': request_info.get('username', ''), SERVER_UUIDS: server_uuids, } for forwarded_key in FORWARDED_KEYS: if forwarded_key in request_info: consumer_data[forwarded_key] = request_info[forwarded_key] # TODO: identifier of the server login_client = self._create_login_client() session_id = login_client.login(self.username, self.password) client = self._create_client(login_client.get_cookies()) serialized_client_initial_data = json.dumps(client_initial_data) serialized_consumer_data = json.dumps(consumer_data) # If the administrator has mapped that this experiment_id is other, take that other. Otherwide, take the same one requested_experiment_id_str = self.experiments_map.get( experiment_id.to_weblab_str(), experiment_id.to_weblab_str()) requested_experiment_id = ExperimentId.parse( requested_experiment_id_str) external_reservation = client.reserve_experiment( session_id, requested_experiment_id, serialized_client_initial_data, serialized_consumer_data) if external_reservation.is_null(): return None remote_reservation_id = external_reservation.reservation_id.id log.log( ExternalWebLabDeustoScheduler, log.level.Warning, "Local reservation_id %s is linked to remote reservation %s" % (reservation_id, remote_reservation_id)) cookies = client.get_cookies() serialized_cookies = pickle.dumps(cookies) redis_client = self.redis_maker() pipeline = redis_client.pipeline() pipeline.hset( self.external_weblabdeusto_reservations, reservation_id, json.dumps({ 'remote_reservation_id': remote_reservation_id, 'cookies': serialized_cookies, 'start_time': time_mod.time(), })) external_weblabdeusto_pending_results = self.EXTERNAL_WEBLABDEUSTO_PENDING_RESULTS % ( self.resource_type_name, self.core_server_route) pipeline.hset( external_weblabdeusto_pending_results, reservation_id, json.dumps({ 'remote_reservation_id': remote_reservation_id, 'username': request_info.get('username', ''), 'serialized_request_info': pickle.dumps(request_info), 'experiment_id_str': experiment_id.to_weblab_str(), })) pipeline.execute() reservation_status = self._convert_reservation_to_status( external_reservation, reservation_id, remote_reservation_id) return reservation_status
def get_experiment_id(self, reservation_id): reservation_data = self.get_reservation_data(reservation_id) if reservation_data is None: raise CoordExc.ExpiredSessionError("Expired reservation: no experiment id found for that reservation (%s)" % reservation_id) return ExperimentId.parse(reservation_data[EXPERIMENT_TYPE])
def _process(self): session = self.session_maker() try: pending_results = [ pending_result.to_dto() for pending_result in session.query(ExternalWebLabDeustoReservationPendingResults).filter_by(resource_type_name = self.resource_type_name, server_route = self.server_route).all() ] finally: session.close() if len(pending_results) > 0: try: session_id, client = self.create_client_func(None) except urllib2.URLError: # Remote server is down, try later return remote_reservation_ids = [ SessionId(pending_result.remote_reservation_id) for pending_result in pending_results ] results = client.get_experiment_uses_by_id(session_id, remote_reservation_ids) for pending_result, result in zip(pending_results, results): if result.is_alive(): continue username = pending_result.username try: request_info = pickle.loads(pending_result.serialized_request_info.encode('utf-8')) except Exception as e: log.log(ResultsRetriever, log.level.Critical, "Probably serialized_request_info was truncated in %s" % pending_result) log.log_exc(ResultsRetriever, log.level.Error) request_info = {'error' : 'could not be stored: %s' % e} reservation_id = pending_result.reservation_id remote_reservation_id = pending_result.remote_reservation_id session = self.session_maker() try: db_pending_result = session.query(ExternalWebLabDeustoReservationPendingResults).filter_by(id = pending_result.id).first() if db_pending_result is not None: session.delete(db_pending_result) session.commit() else: log.log(ResultsRetriever, log.level.Info, "Pending reservation %r not found. Assuming it is managed by other thread" % pending_result) continue except (IntegrityError, ConcurrentModificationError, StaleDataError): log.log(ResultsRetriever, log.level.Info, "Pending reservation %r deletion failed. Assuming it is managed by other thread" % pending_result) log.log_exc(ResultsRetriever, log.level.Debug) # Somebody else is already handling this continue finally: session.close() if result.is_finished(): use = result.experiment_use use.experiment_id = ExperimentId.parse(pending_result.experiment_id_str) use.remote_reservation_id = remote_reservation_id use.reservation_id = reservation_id for key in [ key for key in request_info ]: if not isinstance(request_info[key], (basestring, numbers.Number)): request_info.pop(key) use.request_info = request_info callback = lambda : self.post_reservation_data_manager.delete(reservation_id) self.completed_store.put(username, use, callback) else: log.log(ResultsRetriever, log.level.Info, "Reservation id %s was cancelled and therefore not stored" % reservation_id)
def _initial_clean(self, coordination_configuration_parser): external_servers_config = coordination_configuration_parser.parse_external_servers() for external_server_str in external_servers_config: for resource_type_name in external_servers_config[external_server_str]: self.resources_manager.add_experiment_id(ExperimentId.parse(external_server_str), resource_type_name)
def _update_queues(self): ########################################################### # There are reasons why a waiting reservation may not be # able to be promoted while the next one is. For instance, # if a user is waiting for "pld boards", but only for # instances of "pld boards" which have a "ud-binary@Binary # experiments" server running. If only a "ud-pld@PLD # Experiments" is available, then this user will not be # promoted and the another user which is waiting for a # "ud-pld@PLD Experiments" can be promoted. # # Therefore, we have a list of the IDs of the waiting # reservations we previously thought that they couldn't be # promoted in this iteration. They will have another # chance in the next run of _update_queues. # previously_waiting_reservation_ids = [] weblab_resource_pqueue_map = WEBLAB_RESOURCE_PQUEUE_MAP % self.resource_type_name weblab_resource_pqueue_sorted = WEBLAB_RESOURCE_PQUEUE_SORTED % self.resource_type_name weblab_resource_slots = WEBLAB_RESOURCE_SLOTS % self.resource_type_name ########################################################### # While there are free instances and waiting reservations, # take the first waiting reservation and set it to current # reservation. Make this repeatedly because we want to # commit each change # while True: client = self.redis_maker() filled_waiting_reservation_ids = client.zrangebyscore(weblab_resource_pqueue_sorted, -10000, +10000, start=0, num=len(previously_waiting_reservation_ids) + 1) first_waiting_reservation_id = None for filled_waiting_reservation_id in filled_waiting_reservation_ids: waiting_reservation_id = filled_waiting_reservation_id[filled_waiting_reservation_id.find('_')+1:] if waiting_reservation_id not in previously_waiting_reservation_ids: first_waiting_reservation_id = waiting_reservation_id break if first_waiting_reservation_id is None: return # There is no waiting reservation for this resource that we haven't already tried previously_waiting_reservation_ids.append(first_waiting_reservation_id) # # For the current resource_type, let's ask for # all the resource instances available (i.e. those # who are a member on weblab:resource:%s:slots ) # free_instances = [ Resource(self.resource_type_name, resource_instance) for resource_instance in client.smembers(weblab_resource_slots) ] if len(free_instances) == 0: # If there is no free instance, just return return # # Select the correct free_instance for the current student among # all the free_instances # if self.randomize_instances: randomized_free_instances = [ free_instance for free_instance in free_instances ] random.shuffle(randomized_free_instances) else: randomized_free_instances = sorted(free_instances, cmp=lambda r1, r2: cmp(r1.resource_type, r2.resource_type) or cmp(r1.resource_instance, r2.resource_instance)) for free_instance in randomized_free_instances: # # IMPORTANT: from here on every "continue" should first revoke the # reservations_manager and resources_manager confirmations # working = self.resources_manager.check_working(free_instance) if not working: # The instance is not working continue confirmed = self.reservations_manager.confirm(first_waiting_reservation_id) if not confirmed: # student has already been confirmed somewhere else, so don't try with other # instances, but rather with other student break acquired = self.resources_manager.acquire_resource(free_instance) # print "ACQUIRED", free_instance, acquired, time.time() if not acquired: # the instance has been acquired by someone else. unconfirm student and # try again with other free_instance self.reservations_manager.downgrade_confirmation(first_waiting_reservation_id) continue weblab_resource_pqueue_instance_reservations = WEBLAB_RESOURCE_PQUEUE_INSTANCE_RESERVATIONS % (self.resource_type_name, free_instance.resource_instance) client.sadd(weblab_resource_pqueue_instance_reservations, first_waiting_reservation_id) weblab_reservation_pqueue = WEBLAB_RESOURCE_RESERVATION_PQUEUE % (self.resource_type_name, first_waiting_reservation_id) pqueue_reservation_data_str = client.get(weblab_reservation_pqueue) reservation_data = self.reservations_manager.get_reservation_data(first_waiting_reservation_id) if pqueue_reservation_data_str is None or reservation_data is None: # the student is not here anymore; downgrading confirmation is not required # but releasing the resource is; and skip the rest of the free instances self.resources_manager.release_resource(free_instance) client.srem(weblab_resource_pqueue_instance_reservations, first_waiting_reservation_id) break pqueue_reservation_data = json.loads(pqueue_reservation_data_str) start_time = self.time_provider.get_time() total_time = pqueue_reservation_data[TIME] pqueue_reservation_data[START_TIME] = start_time pqueue_reservation_data[TIMESTAMP_BEFORE] = start_time pqueue_reservation_data[ACTIVE_STATUS] = STATUS_WAITING_CONFIRMATION pqueue_reservation_data[RESOURCE_INSTANCE] = free_instance.to_weblab_str() initialization_in_accounting = pqueue_reservation_data[INITIALIZATION_IN_ACCOUNTING] client_initial_data = reservation_data[CLIENT_INITIAL_DATA] request_info = json.loads(reservation_data[REQUEST_INFO]) username = request_info.get('username') locale = request_info.get('locale') requested_experiment_type = ExperimentId.parse(reservation_data[EXPERIMENT_TYPE]) selected_experiment_instance = None experiment_instances = self.resources_manager.list_experiment_instance_ids_by_resource(free_instance) for experiment_instance in experiment_instances: if experiment_instance.to_experiment_id() == requested_experiment_type: selected_experiment_instance = experiment_instance if selected_experiment_instance is None: # This resource is not valid for this user, other free_instance should be # selected. Try with other, but first clean the acquired resources self.reservations_manager.downgrade_confirmation(first_waiting_reservation_id) self.resources_manager.release_resource(free_instance) client.srem(weblab_resource_pqueue_instance_reservations, first_waiting_reservation_id) continue pqueue_reservation_data[EXPERIMENT_INSTANCE] = selected_experiment_instance.to_weblab_str() laboratory_coord_address = self.resources_manager.get_laboratory_coordaddress_by_experiment_instance_id(selected_experiment_instance) pqueue_reservation_data[LAB_COORD] = laboratory_coord_address client.set(weblab_reservation_pqueue, json.dumps(pqueue_reservation_data)) filled_reservation_id = client.hget(weblab_resource_pqueue_map, first_waiting_reservation_id) client.zrem(weblab_resource_pqueue_sorted, filled_reservation_id) # # Enqueue the confirmation, since it might take a long time # (for instance, if the laboratory server does not reply because # of any network problem, or it just takes too much in replying), # so this method might take too long. That's why we enqueue these # petitions and run them in other threads. # deserialized_server_initial_data = { 'priority.queue.slot.length' : '%s' % total_time, 'priority.queue.slot.start' : '%s' % datetime.datetime.fromtimestamp(start_time), 'priority.queue.slot.initialization_in_accounting' : initialization_in_accounting, 'request.experiment_id.experiment_name' : selected_experiment_instance.exp_name, 'request.experiment_id.category_name' : selected_experiment_instance.cat_name, 'request.username' : username, 'request.full_name' : username, 'request.locale' : locale, # TODO: add the username and user full name here } server_initial_data = json.dumps(deserialized_server_initial_data) # server_initial_data will contain information such as "what was the last experiment used?". # If a single resource was used by a binary experiment, then the next time may not require reprogramming the device self.confirmer.enqueue_confirmation(laboratory_coord_address, first_waiting_reservation_id, selected_experiment_instance, client_initial_data, server_initial_data, self.resource_type_name) # # After it, keep in the while True in order to add the next # reservation # break
def __init__(self, data_manager, locator, cfg_manager, ConfirmerClass): if ConfirmerClass is None: ConfirmerClass = Confirmer.ReservationConfirmer self.cfg_manager = cfg_manager self._data_manager = data_manager self.core_server_url = self.cfg_manager.get_value(CORE_SERVER_URL) self.notifier = AdminNotifier.AdminNotifier(self.cfg_manager) self.notifications_enabled = self.cfg_manager.get_value( RESOURCES_CHECKER_NOTIFICATIONS_ENABLED, DEFAULT_RESOURCES_CHECKER_NOTIFICATIONS_ENABLED) self.locator = locator # Used by ResourcesChecker self.confirmer = ConfirmerClass(self, locator) self.time_provider = self.CoordinatorTimeProvider() self.initial_store = TemporalInformationStore.InitialTemporalInformationStore( ) self.finished_store = TemporalInformationStore.FinishTemporalInformationStore( ) self.completed_store = TemporalInformationStore.CompletedInformationStore( ) self.finished_reservations_store = Queue.Queue() self._initialize_managers() # # The system administrator must define what scheduling system is used by each resource type # For instance: # # scheduling_systems = { # "pld boards" : ("PRIORITY_QUEUE", {}), # "fpga boards" : ("PRIORITY_QUEUE", {}), # "vm experiments" : ("BOOKING", { 'slots' : 30 * 1000 }), # Slots of 30 minutes # "something else" : ("EXTERNAL", { 'address' : 'http://192.168.1.50:8080/SchedulingServer', 'protocol' : 'SOAP' }) # If somebody else has implemented the scheduling schema in other language # } # self.schedulers = {} scheduling_systems = cfg_manager.get_value(CORE_SCHEDULING_SYSTEMS) for resource_type_name in scheduling_systems: scheduling_system, arguments = scheduling_systems[ resource_type_name] if not scheduling_system in self.SCHEDULING_SYSTEMS: raise CoordExc.UnregisteredSchedulingSystemError( "Unregistered scheduling system: %r" % scheduling_system) SchedulingSystemClass = self.SCHEDULING_SYSTEMS[scheduling_system] data_manager = self._data_manager generic_scheduler_arguments = Scheduler.GenericSchedulerArguments( cfg_manager=self.cfg_manager, resource_type_name=resource_type_name, reservations_manager=self.reservations_manager, resources_manager=self.resources_manager, confirmer=self.confirmer, data_manager=data_manager, time_provider=self.time_provider, core_server_url=self.core_server_url, initial_store=self.initial_store, finished_store=self.finished_store, completed_store=self.completed_store, post_reservation_data_manager=self. post_reservation_data_manager) self.schedulers[resource_type_name] = SchedulingSystemClass( generic_scheduler_arguments, **arguments) self.aggregators = { # experiment_id_str : AGGREGATOR( schedulers ) } coordination_configuration_parser = CoordinationConfigurationParser.CoordinationConfigurationParser( cfg_manager) resource_types_per_experiment_id = coordination_configuration_parser.parse_resources_for_experiment_ids( ) # # This configuration argument has a dictionary such as: # { # 'experiment_id_str' : {'foo' : 'bar'} # } # # The argument itself is not mandatory. # aggregators_configuration = self.cfg_manager.get_value( CORE_SCHEDULER_AGGREGATORS, {}) for experiment_id_str in resource_types_per_experiment_id: generic_scheduler_arguments = Scheduler.GenericSchedulerArguments( cfg_manager=self.cfg_manager, resource_type_name=None, reservations_manager=self.reservations_manager, resources_manager=self.resources_manager, confirmer=self.confirmer, data_manager=self._data_manager, time_provider=self.time_provider, core_server_url=self.core_server_url, initial_store=self.initial_store, finished_store=self.finished_store, completed_store=self.completed_store, post_reservation_data_manager=self. post_reservation_data_manager) resource_type_names = resource_types_per_experiment_id[ experiment_id_str] try: aggregated_schedulers = OrderedDict() for resource_type_name in resource_type_names: aggregated_schedulers[ resource_type_name] = self.schedulers[ resource_type_name] except KeyError, ke: raise Exception( "Scheduler not found with resource type name %s. Check %s config property." % (ke, CORE_SCHEDULING_SYSTEMS)) particular_configuration = aggregators_configuration.get( experiment_id_str) aggregator = self.AGGREGATOR(generic_scheduler_arguments, ExperimentId.parse(experiment_id_str), aggregated_schedulers, particular_configuration) self.aggregators[experiment_id_str] = aggregator
def _process(self): redis_client = self.redis_maker() pending_results = [] for reservation_id in redis_client.hkeys( self.external_weblabdeusto_pending): pending_result_str = redis_client.hget( self.external_weblabdeusto_pending, reservation_id) if pending_result_str is not None: pending_result = json.loads(pending_result_str) pending_result['reservation_id'] = reservation_id pending_results.append(pending_result) if len(pending_results) > 0: try: session_id, client = self.create_client_func(None) except urllib2.URLError: # Remote server is down, try later return remote_reservation_ids = [ SessionId(pending_result['remote_reservation_id']) for pending_result in pending_results ] results = client.get_experiment_uses_by_id(session_id, remote_reservation_ids) for pending_result, result in zip(pending_results, results): if result.is_alive(): continue username = pending_result['username'] try: request_info = pickle.loads( pending_result['serialized_request_info'].encode( 'utf-8')) except Exception as e: log.log( ResultsRetriever, log.level.Critical, "Probably serialized_request_info was truncated in %s" % pending_result) log.log_exc(ResultsRetriever, log.level.Error) request_info = {'error': 'could not be stored: %s' % e} reservation_id = pending_result['reservation_id'] remote_reservation_id = pending_result['remote_reservation_id'] if not redis_client.hdel(self.external_weblabdeusto_pending, reservation_id): log.log( ResultsRetriever, log.level.Info, "Pending reservation %r not found. Assuming it is managed by other thread" % pending_result) continue if result.is_finished(): use = result.experiment_use use.experiment_id = ExperimentId.parse( pending_result['experiment_id_str']) use.remote_reservation_id = remote_reservation_id use.reservation_id = reservation_id for key in [key for key in request_info]: if not isinstance(request_info[key], (basestring, numbers.Number)): request_info.pop(key) use.request_info = request_info callback = lambda: self.post_reservation_data_manager.delete( reservation_id) self.completed_store.put(username, use, callback) else: log.log( ResultsRetriever, log.level.Info, "Reservation id %s was cancelled and therefore not stored" % reservation_id)
def _process(self): session = self.session_maker() try: pending_results = [ pending_result.to_dto() for pending_result in session.query( ExternalWebLabDeustoReservationPendingResults).filter_by( resource_type_name=self.resource_type_name, server_route=self.server_route).all() ] finally: session.close() if len(pending_results) > 0: try: session_id, client = self.create_client_func(None) except urllib2.URLError: # Remote server is down, try later return remote_reservation_ids = [ SessionId(pending_result.remote_reservation_id) for pending_result in pending_results ] results = client.get_experiment_uses_by_id(session_id, remote_reservation_ids) for pending_result, result in zip(pending_results, results): if result.is_alive(): continue username = pending_result.username try: request_info = pickle.loads( pending_result.serialized_request_info.encode('utf-8')) except Exception as e: log.log( ResultsRetriever, log.level.Critical, "Probably serialized_request_info was truncated in %s" % pending_result) log.log_exc(ResultsRetriever, log.level.Error) request_info = {'error': 'could not be stored: %s' % e} reservation_id = pending_result.reservation_id remote_reservation_id = pending_result.remote_reservation_id session = self.session_maker() try: db_pending_result = session.query( ExternalWebLabDeustoReservationPendingResults ).filter_by(id=pending_result.id).first() if db_pending_result is not None: session.delete(db_pending_result) session.commit() else: log.log( ResultsRetriever, log.level.Info, "Pending reservation %r not found. Assuming it is managed by other thread" % pending_result) continue except (IntegrityError, ConcurrentModificationError, StaleDataError): log.log( ResultsRetriever, log.level.Info, "Pending reservation %r deletion failed. Assuming it is managed by other thread" % pending_result) log.log_exc(ResultsRetriever, log.level.Debug) # Somebody else is already handling this continue finally: session.close() if result.is_finished(): use = result.experiment_use use.experiment_id = ExperimentId.parse( pending_result.experiment_id_str) use.remote_reservation_id = remote_reservation_id use.reservation_id = reservation_id for key in [key for key in request_info]: if not isinstance(request_info[key], (basestring, numbers.Number)): request_info.pop(key) use.request_info = request_info callback = lambda: self.post_reservation_data_manager.delete( reservation_id) self.completed_store.put(username, use, callback) else: log.log( ResultsRetriever, log.level.Info, "Reservation id %s was cancelled and therefore not stored" % reservation_id)
def list_experiments(self): client = self._redis_maker() return [ ExperimentId.parse(exp_type) for exp_type in client.smembers(WEBLAB_EXPERIMENT_TYPES) ]