def create(reservation: Dict[str, Any]) -> Tuple[Content, HttpStatusCode]: try: new_reservation = Reservation(title=reservation['title'], description=reservation['description'], resource_id=reservation['resourceId'], user_id=reservation['userId'], start=reservation['start'], end=reservation['end']) user = User.get(get_jwt_identity()) if (is_admin() or __is_reservation_owner(new_reservation)) \ and ReservationVerifier.is_reservation_allowed(user, new_reservation): new_reservation.save() content = { 'msg': RESERVATION['create']['success'], 'reservation': new_reservation.as_dict() } status = 201 else: content = {'msg': RESERVATION['create']['failure']['forbidden']} status = 403 except AssertionError as e: content = { 'msg': RESERVATION['create']['failure']['invalid'].format(reason=e) } status = 422 except Exception as e: print(e) content = {'msg': GENERAL['internal_error'] + str(e)} status = 500 finally: return content, status
def test_delete_active_reservation(tables, client, active_reservation, permissive_restriction): permissive_restriction.save() active_reservation.save() resp = client.delete(ENDPOINT + '/' + str(active_reservation.id), headers=HEADERS) assert resp.status_code == HTTPStatus.OK with pytest.raises(NoResultFound): Reservation.get(active_reservation.id)
def test_current_events_will_only_return_non_cancelled_reservations(tables, new_reservation, new_reservation_2): new_reservation.start = datetime.utcnow() - timedelta(minutes=10) new_reservation.end = datetime.utcnow() + timedelta(minutes=60) new_reservation.save() assert new_reservation in Reservation.current_events() new_reservation.is_cancelled = True new_reservation.save() new_reservation_2.save() current_events = Reservation.current_events() assert new_reservation not in current_events assert new_reservation_2 in current_events
def create(reservation: Dict[str, Any]) -> Tuple[Content, HttpStatusCode]: try: new_reservation = Reservation(title=reservation['title'], description=reservation['description'], resource_id=reservation['resourceId'], user_id=reservation['userId'], start=reservation['start'], end=reservation['end']) if not is_admin() and not __is_reservation_owner(new_reservation): raise ForbiddenException( "Cannot reserve resources in another user's name") reservation_start = DateUtils.try_parse_string(new_reservation.start) request_time_limit = timedelta(minutes=1) starts_in_the_future = (reservation_start + request_time_limit) >= datetime.utcnow() if not is_admin() and not starts_in_the_future: raise ForbiddenException("Cannot reserve resources in the past") user = User.get(get_jwt_identity()) if not ReservationVerifier.is_reservation_allowed( user, new_reservation): raise ForbiddenException("Reservation not allowed") new_reservation.save() content = { 'msg': RESERVATION['create']['success'], 'reservation': new_reservation.as_dict() } status = 201 except ForbiddenException as e: content = { 'msg': RESERVATION['create']['failure']['forbidden'].format(reason=e) } status = 403 except AssertionError as e: content = { 'msg': RESERVATION['create']['failure']['invalid'].format(reason=e) } status = 422 except Exception as e: print(e) content = {'msg': GENERAL['internal_error'] + str(e)} status = 500 finally: return content, status
def check_current_gpu_slots( self, hosts_with_gpu_occupation: Dict[str, Dict[str, bool]] ) -> Dict[str, Dict[str, int]]: '''For each GPU in the dictionary, return the numbers of minutes until the next reservation of consecutive GPUs. Return 0 for GPUs that are currently occupied, regardless of the reservations. Return None for GPUs that have no scheduled reservations in the future. :param hosts_with_gpu_occupation: {hostname: {GPU_id: True if GPU occupied}} :returns: {hostname: {GPU_id: number_of_minutes until next occupation of the GPU}} ''' ret = {} # type: Dict[str, Dict[str, int]] for host in hosts_with_gpu_occupation: ret[host] = {} for gpu_id in hosts_with_gpu_occupation[host]: if hosts_with_gpu_occupation[host][gpu_id]: ret[host][gpu_id] = 0 else: near_reservations = Reservation.upcoming_events_for_resource( gpu_id, self.considered_future_period) if len(near_reservations): nearest_reservation = near_reservations[0] if nearest_reservation.start > datetime.utcnow( ): # type: ignore ret[host][gpu_id] = \ (nearest_reservation.start - datetime.utcnow()).total_seconds() / 60 # type: ignore else: ret[host][gpu_id] = 0 else: ret[host][gpu_id] = None return ret
def test_create_reservation_with_an_indefinite_restriction( tables, client, new_user, restriction): new_user.save() # Create an indefinite restriction and assign it to the user restriction.starts_at = '2101-01-01T10:00:00.000Z' restriction.ends_at = None restriction.apply_to_user(new_user) # Create a resource and assign it to the restriction resource = Resource(id='0123456789012345678901234567890123456789') resource.save() restriction.apply_to_resource(resource) data = { 'title': 'Test reservation', 'description': 'Test reservation', 'resourceId': '0123456789012345678901234567890123456789', 'userId': new_user.id, 'start': '2101-01-02T10:00:00.000Z', 'end': '2101-01-03T12:00:00.000Z' } resp = client.post(ENDPOINT, headers=HEADERS, data=json.dumps(data)) resp_json = json.loads(resp.data.decode('utf-8')) assert resp.status_code == HTTPStatus.CREATED assert Reservation.get(resp_json['reservation']['id']) is not None
def get_selected( resources_ids: Optional[List[ResourceId]] = None, start: Optional[str] = None, end: Optional[str] = None ) -> Tuple[Union[List[Any], Content], HttpStatusCode]: # TODO This may need a decent refactor - give more freedom # All args are required at once, otherwise return 400 all_not_none = resources_ids and start and end if all_not_none: try: start_as_datetime = DateUtils.parse_string(start) ends_as_datetime = DateUtils.parse_string(end) matches = list( Reservation.filter_by_uuids_and_time_range( resources_ids, start_as_datetime, ends_as_datetime)) matches = [match.as_dict() for match in matches] except (ValueError, AssertionError) as reason: content = {'msg': '{}. {}'.format(GENERAL['bad_request'], reason)} status = 400 except Exception as e: content = {'msg': GENERAL['internal_error'] + str(e)} status = 500 else: content = matches # type: ignore status = 200 else: content = {'msg': GENERAL['bad_request']} status = 400 return content, status
def delete(id): try: current_user_id = get_jwt_identity() claims = get_jwt_claims() # Fetch the reservation reservation_to_destroy = Reservation.get(id) # Must be priviliged is_admin = 'admin' in claims['roles'] is_owner = reservation_to_destroy.user_id == current_user_id assert is_owner or is_admin, G['unpriviliged'] # Destroy reservation_to_destroy.destroy() except AssertionError as error_message: content, status = {'msg': str(error_message)}, 403 except NoResultFound: # FIXME It is theoretically posibble that User.get() could also raise this exception content, status = {'msg': R['not_found']}, 404 except Exception as e: content, status = {'msg': G['internal_error'] + str(e)}, 500 else: content, status = {'msg': R['delete']['success']}, 200 finally: return content, status
def new_reservation_2(new_user): now = datetime.utcnow() duration = timedelta(minutes=60) return Reservation( user_id=2, title='TEST TITLE', description='TEST_DESCRIPTION', protected_resource_id='0123456789012345678901234567890123456789', starts_at=now, ends_at=now + duration, )
def active_reservation(new_user, resource1): new_user.save() start = datetime.datetime.utcnow() - timedelta(hours=5) duration = timedelta(hours=10) return Reservation( user_id=new_user.id, title='TEST TITLE', description='TEST_DESCRIPTION', resource_id=resource1.id, start=start, end=start + duration, )
def new_reservation(new_user, resource1): new_user.save() now = datetime.datetime.utcnow() duration = timedelta(minutes=60) return Reservation( user_id=new_user.id, title='TEST TITLE', description='TEST_DESCRIPTION', resource_id=resource1.id, start=now, end=now + duration, )
def update(id: ReservationId, newValues: Dict[str, Any]) -> Tuple[Content, HttpStatusCode]: new_values = newValues allowed_fields = {'title', 'description', 'resourceId', 'end'} try: reservation = Reservation.get(id) if reservation.end < datetime.utcnow() and not is_admin(): raise ForbiddenException('reservation already finished') if reservation.start > datetime.utcnow() or is_admin(): allowed_fields.add('start') if not set(new_values.keys()).issubset(allowed_fields): raise ForbiddenException('invalid field is present') for field_name, new_value in new_values.items(): field_name = snakecase(field_name) assert (field_name is not None) and hasattr(reservation, field_name), \ 'reservation has no {} field'.format(field_name) setattr(reservation, field_name, new_value) user = User.get(get_jwt_identity()) if not (is_admin() or __is_reservation_owner(reservation)) or not \ ReservationVerifier.is_reservation_allowed(user, reservation): raise ForbiddenException("reservation not allowed") reservation.is_cancelled = False reservation.save() content, status = { 'msg': RESERVATION['update']['success'], 'reservation': reservation.as_dict() }, 201 except ForbiddenException as fe: content, status = { 'msg': RESERVATION['update']['failure']['forbidden'].format(reason=fe) }, 403 except NoResultFound: content, status = {'msg': RESERVATION['not_found']}, 404 except AssertionError as e: content, status = { 'msg': RESERVATION['update']['failure']['assertions'].format(reason=e) }, 422 except Exception as e: log.critical(e) content, status = {'msg': GENERAL['internal_error'] + str(e)}, 500 finally: return content, status
def log_current_usage(self): '''Updates log files related to current reservations''' current_reservations = Reservation.current_events() infrastructure = self.infrastructure_manager.infrastructure for reservation in current_reservations: filename = '{id}.json'.format(id=reservation.id) log_file_path = self.log_dir / filename try: gpu_data = self.extract_specific_gpu_data( uuid=reservation.protected_resource_id, infrastructure=infrastructure) Log(data=gpu_data).save(out_path=log_file_path) except Exception as e: log.error(e)
def test_update_past_reservation(tables, client, past_reservation, permissive_restriction): permissive_restriction.save() past_reservation.save() new_reservation_title = past_reservation.title + '111' resp = client.put(ENDPOINT + '/' + str(past_reservation.id), headers=HEADERS, data=json.dumps({'title': new_reservation_title})) resp_json = json.loads(resp.data.decode('utf-8')) assert resp.status_code == HTTPStatus.CREATED assert resp_json['reservation']['title'] == new_reservation_title assert Reservation.get(past_reservation.id).title == new_reservation_title
def get_selected(resources_ids: List, start: str, end: str): # TODO This may need a decent refactor - give more freedom # All args are required at once, otherwise return 400 all_not_none = resources_ids and start and end if all_not_none: try: start_as_datetime = Reservation.parsed_input_datetime(start) ends_as_datetime = Reservation.parsed_input_datetime(end) matches = list(Reservation.filter_by_uuids_and_time_range( resources_ids, start_as_datetime, ends_as_datetime)) matches = [match.as_dict for match in matches] except (ValueError, AssertionError) as reason: content = {'msg': '{}. {}'.format(G['bad_request'], reason)} status = 400 except Exception: content = {'msg': G['internal_error']} status = 500 else: content = matches # type: ignore status = 200 else: content = {'msg': G['bad_request']} status = 400 return content, status
def create(reservation): try: new_reservation = Reservation( title=reservation['title'], description=reservation['description'], protected_resource_id=reservation['resourceId'], user_id=reservation['userId'], starts_at=reservation['start'], ends_at=reservation['end']) new_reservation.save() except AssertionError as e: content = {'msg': R['create']['failure']['invalid'].format(reason=e)} status = 422 except Exception: content = {'msg': G['internal_error']} status = 500 else: content = { 'msg': R['create']['success'], 'reservation': new_reservation.as_dict } status = 201 finally: return content, status
def interferes_with_reservations( job: Job, available_hosts_with_gpu_occupation: Dict[str, Dict], considered_future_period: timedelta = timedelta(0), allow_own: bool = True) -> bool: for task in job.tasks: gpu_id = Scheduler.get_assigned_gpu_uid( task, available_hosts_with_gpu_occupation) upcoming_reservations = Reservation.upcoming_events_for_resource( gpu_id, considered_future_period) if allow_own: for reservation in upcoming_reservations: if reservation.user is not job.user: return True elif len(upcoming_reservations): return True return False
def test_update_future_reservation_start(tables, client, future_reservation, permissive_restriction): permissive_restriction.save() future_reservation.save() new_reservation_start = future_reservation.start + timedelta(hours=1) resp = client.put( ENDPOINT + '/' + str(future_reservation.id), headers=HEADERS, data=json.dumps({ 'start': DateUtils.stringify_datetime_to_api_format(new_reservation_start) })) resp_json = json.loads(resp.data.decode('utf-8')) assert resp.status_code == HTTPStatus.CREATED assert resp_json['reservation']['start'] == DateUtils.stringify_datetime( new_reservation_start) assert Reservation.get( future_reservation.id).start == new_reservation_start
def delete(id: ReservationId) -> Tuple[Content, HttpStatusCode]: try: reservation_to_destroy = Reservation.get(id) assert (reservation_to_destroy.start > datetime.datetime.utcnow() and __is_reservation_owner(reservation_to_destroy) ) or is_admin(), GENERAL['unprivileged'] reservation_to_destroy.destroy() except AssertionError as error_message: content, status = {'msg': str(error_message)}, 403 except NoResultFound: # FIXME It is theoretically possible that User.get() could also raise this exception content, status = {'msg': RESERVATION['not_found']}, 404 except Exception as e: content, status = {'msg': GENERAL['internal_error'] + str(e)}, 500 else: content, status = {'msg': RESERVATION['delete']['success']}, 200 finally: return content, status
def handle_expired_logs(self): ''' Seeks for ordinary JSON log files related to expired reservations. It creates very simple summary (avg) and fills in existing reservation database record. ''' time_now = datetime.datetime.utcnow() # Get all files within given directory # Accept only files like: 10.json for item in self.log_dir.glob('[0-9]*.json'): if item.is_file(): try: log.debug('Processing file: {}'.format(item)) id_from_filename = int(item.stem) reservation = Reservation.get(id=id_from_filename) reservation_expired = reservation.ends_at < time_now if reservation_expired: log.debug('Reservation id={} has endend.'.format( id_from_filename)) # Generate and persist summary log_contents = JSONLogFile(path=item).read() reservation.gpu_util_avg = avg( log_contents['metrics']['gpu_util']['values']) reservation.mem_util_avg = avg( log_contents['metrics']['mem_util']['values']) log.debug('Saving summary...') reservation.save() # Clean up log immidiately self._clean_up_old_log_file(file=item) except NoResultFound: log.debug( 'Log file for inexisting reservation has been found, cleaning up the file...' ) self._clean_up_old_log_file(file=item) except Exception as e: log.debug(e)
def test_create_reservation_that_is_covered_by_two_separate_restrictions( tables, client, new_user): r1_start = '2101-01-01T00:00:00.000Z' r1_end = '2101-01-02T00:00:00.000Z' r2_start = '2101-01-02T00:00:00.000Z' r2_end = '2101-01-02T23:59:00.000Z' r1 = Restriction(name='FirstRestriction', starts_at=r1_start, ends_at=r1_end, is_global=False) r2 = Restriction(name='SecondRestriction', starts_at=r2_start, ends_at=r2_end, is_global=False) new_user.save() r1.apply_to_user(new_user) r2.apply_to_user(new_user) resource = Resource(id='0123456789012345678901234567890123456789') resource.save() r1.apply_to_resource(resource) r2.apply_to_resource(resource) data = { 'title': 'Test reservation', 'description': 'Test reservation', 'resourceId': '0123456789012345678901234567890123456789', 'userId': new_user.id, 'start': '2101-01-01T10:00:00.000Z', 'end': '2101-01-02T12:00:00.000Z' } resp = client.post(ENDPOINT, headers=HEADERS, data=json.dumps(data)) resp_json = json.loads(resp.data.decode('utf-8')) assert resp.status_code == HTTPStatus.CREATED assert Reservation.get(resp_json['reservation']['id']) is not None
def test_after_updating_restriction_reservations_that_are_no_longer_valid_should_get_cancelled( tables, client, new_user, restriction): new_user.save() # Create a restriction, assign user and resource to it restriction.starts_at = '2101-01-01T10:00:00.000Z' restriction.ends_at = '2101-01-05T10:00:00.000Z' restriction.apply_to_user(new_user) resource = Resource(id='0123456789012345678901234567890123456789') resource.save() restriction.apply_to_resource(resource) # Create a reservation in allowed timeframe (should succeed) data = { 'title': 'Test reservation', 'description': 'Test reservation', 'resourceId': '0123456789012345678901234567890123456789', 'userId': new_user.id, 'start': '2101-01-02T10:00:00.000Z', 'end': '2101-01-03T12:00:00.000Z' } resp = client.post(ENDPOINT, headers=HEADERS, data=json.dumps(data)) resp_json = json.loads(resp.data.decode('utf-8')) reservation = Reservation.get(resp_json['reservation']['id']) assert reservation.is_cancelled is False # Update the restriction to make the reservation invalid data = {'startsAt': '2101-01-04T09:00:00.000Z'} resp = client.put(BASE_URI + '/restrictions/' + str(reservation.id), headers=HEADERS, data=json.dumps(data)) assert resp.status_code == HTTPStatus.OK assert reservation.is_cancelled is True
def test_create_reservation_starting_in_the_past(tables, client, new_user, permissive_restriction): new_user.save() # Create a resource and assign it to the restriction resource = Resource(id='0123456789012345678901234567890123456789') resource.save() past_time = datetime.datetime.now() - timedelta(minutes=2) end_time = past_time + timedelta(hours=1) data = { 'title': 'Test reservation', 'description': 'Test reservation', 'resourceId': '0123456789012345678901234567890123456789', 'userId': new_user.id, 'start': DateUtils.stringify_datetime_to_api_format(past_time), 'end': DateUtils.stringify_datetime_to_api_format(end_time) } resp = client.post(ENDPOINT, headers=HEADERS, data=json.dumps(data)) resp_json = json.loads(resp.data.decode('utf-8')) assert resp.status_code == HTTPStatus.CREATED assert Reservation.get(resp_json['reservation']['id']) is not None
def test_create_reservation(tables, client, new_user, permissive_restriction): new_user.save() # Create a resource and assign it to the restriction resource = Resource(id='0123456789012345678901234567890123456789') resource.save() # Try to create reservation for a period that the user has access to, as specified by the restriction. # Should succeed. now = datetime.datetime.now() data = { 'title': 'Test reservation', 'description': 'Test reservation', 'resourceId': '0123456789012345678901234567890123456789', 'userId': new_user.id, 'start': DateUtils.stringify_datetime_to_api_format(now), 'end': DateUtils.stringify_datetime_to_api_format(now + timedelta(hours=1)) } resp = client.post(ENDPOINT, headers=HEADERS, data=json.dumps(data)) resp_json = json.loads(resp.data.decode('utf-8')) assert resp.status_code == HTTPStatus.CREATED assert Reservation.get(resp_json['reservation']['id']) is not None
def get_all(): return [ reservation.as_dict for reservation in Reservation.all() ], 200
def get_all() -> Tuple[List[Any], HttpStatusCode]: return [reservation.as_dict() for reservation in Reservation.all()], 200
def do_run(self): time_func = time.perf_counter start_time = time_func() current_infrastructure = self.infrastructure_manager.all_nodes_with_gpu_processes( ) for hostname in current_infrastructure: violations = {} # type: Dict[str, Dict] for gpu_id in current_infrastructure[hostname]: processes = current_infrastructure[hostname][gpu_id] if self.strict_reservations or (processes is not None and len(processes)): current_gpu_reservations = Reservation.current_events( gpu_id) reservation = None if len(current_gpu_reservations): reservation = current_gpu_reservations[0] if hostname is None or reservation.user is None: continue for process in processes: if process['owner'] != reservation.user.username: self.store_violation(violations, process, hostname, reservation, gpu_id) elif self.strict_reservations: for process in processes: self.store_violation(violations, process, hostname, reservation, gpu_id) for intruder in violations: violation_data = violations[intruder] reservations = violation_data['RESERVATIONS'] hostnames = set([ reservation_data['HOSTNAME'] for reservation_data in reservations ]) violation_data['SSH_CONNECTIONS'] = { hostname: self.connection_manager.single_connection(hostname) for hostname in hostnames } violation_data['GPUS'] = ',\n'.join([ '{} - GPU{}: {}'.format(data['HOSTNAME'], data['GPU_ID'], data['GPU_NAME']) for data in reservations ]) violation_data['OWNERS'] = ', '.join([ '{} ({})'.format(data['OWNER_USERNAME'], data['OWNER_EMAIL']) for data in reservations ]) for handler in self.violation_handlers: try: handler.trigger_action(violation_data) except Exception as e: log.warning('Error in violation handler: {}'.format(e)) end_time = time_func() execution_time = end_time - start_time # Hold on until next interval if execution_time < self.interval: gevent.sleep(self.interval - execution_time) waiting_time = time_func() - end_time total_time = execution_time + waiting_time log.debug( 'ProtectionService loop took: {:.2f}s (waiting {:.2f}) = {:.2f}'. format(execution_time, waiting_time, total_time))
def do_run(self): time_func = time.perf_counter start_time = time_func() # 1. Get list of current reservations current_reservations = Reservation.current_events() # FIXME DEBUG ONLY log.debug( json.dumps([r.as_dict() for r in current_reservations], indent=4)) for reservation in current_reservations: # 1. Extract reservation info uuid = reservation.resource_id hostname = self.find_hostname(uuid) user = User.get(reservation.user_id) username = user.username if hostname is None or username is None: log.warning( 'Unable to process the reservation ({}@{}), skipping...'. format(username, hostname)) continue # 2. Establish connection to node and find all tty sessions node_connection = self.connection_manager.single_connection( hostname) node_sessions = self.node_tty_sessions(node_connection) node_processes = self.node_gpu_processes(hostname) reserved_gpu_process_owners = self.gpu_users(node_processes, uuid) is_unprivileged = lambda sess: sess[ 'USER'] in reserved_gpu_process_owners intruder_ttys = [ sess for sess in node_sessions if is_unprivileged(sess) ] try: # Priviliged user can be ignored on this list reserved_gpu_process_owners.remove(username) except ValueError: pass finally: unprivileged_gpu_process_owners = reserved_gpu_process_owners # 3. Execute protection handlers for intruder in unprivileged_gpu_process_owners: violation_data = { 'INTRUDER_USERNAME': intruder, 'RESERVATION_OWNER_USERNAME': username, 'RESERVATION_OWNER_EMAIL': user.email, 'RESERVATION_END': utc2local(reservation.end), 'UUID': uuid, 'GPU_NAME': self.gpu_attr(hostname, uuid, attribute='name'), 'GPU_ID': self.gpu_attr(hostname, uuid, attribute='index'), 'HOSTNAME': hostname, 'TTY_SESSIONS': intruder_ttys, 'SSH_CONNECTION': node_connection } for handler in self.violation_handlers: handler.trigger_action(violation_data) end_time = time_func() execution_time = end_time - start_time # Hold on until next interval if execution_time < self.interval: gevent.sleep(self.interval - execution_time) waiting_time = time_func() - end_time total_time = execution_time + waiting_time log.debug( 'ProtectionService loop took: {:.2f}s (waiting {:.2f}) = {:.2f}'. format(execution_time, waiting_time, total_time))