def get(self, request_id): is_auth, auth_reject_message = self._validate_user() if not is_auth: return {'message': f'Authentication Failed: {str(auth_reject_message)}'}, 401 submitted_request = TransformRequest.return_request(request_id) if not submitted_request: return "Transform Not Found", "404" status_request = status_request_parser.parse_args() count = TransformationResult.count(request_id) stats = TransformationResult.statistics(request_id) failures = TransformationResult.failed_files(request_id) print(count, stats) print(TransformRequest.files_remaining(request_id)) result_dict = { "status": submitted_request.status, "request-id": request_id, "files-processed": count - failures, "files-skipped": failures, "files-remaining": TransformRequest.files_remaining(request_id), "stats": stats } if status_request.details: result_dict['details'] = TransformationResult.to_json_list( TransformationResult.get_all_status(request_id)) return jsonify(result_dict)
def get(self, request_id=None): if request_id: return TransformRequest.to_json( TransformRequest.return_request(request_id) ) else: return TransformRequest.return_all()
def report_fileset_complete(self, submitted_request, num_files, num_skipped=0, total_events=0, total_bytes=0, did_lookup_time=0): submitted_request.files = num_files submitted_request.files_skipped = num_skipped submitted_request.total_events = total_events submitted_request.total_bytes = total_bytes submitted_request.did_lookup_time = did_lookup_time TransformRequest.update_request(submitted_request)
def post(self, request_id): from servicex.kafka_topic_manager import KafkaTopicManager submitted_request = TransformRequest.return_request(request_id) submitted_request.status = 'Running' submitted_request.save_to_db() db.session.commit() if current_app.config['TRANSFORMER_MANAGER_ENABLED']: if submitted_request.result_destination == 'kafka': # Setup the kafka topic with the correct number of partitions and max # message size max_message_size = 1920000 kafka = KafkaTopicManager(submitted_request.kafka_broker) kafka.create_topic(request_id, max_message_size=max_message_size, num_partitions=100) rabbitmq_uri = current_app.config['TRANSFORMER_RABBIT_MQ_URL'] namepsace = current_app.config['TRANSFORMER_NAMESPACE'] x509_secret = current_app.config['TRANSFORMER_X509_SECRET'] generated_code_cm = submitted_request.generated_code_cm self.transformer_manager.launch_transformer_jobs( image=submitted_request.image, request_id=request_id, workers=submitted_request.workers, chunk_size=submitted_request.chunk_size, rabbitmq_uri=rabbitmq_uri, namespace=namepsace, x509_secret=x509_secret, generated_code_cm=generated_code_cm, result_destination=submitted_request.result_destination, result_format=submitted_request.result_format, kafka_broker=submitted_request.kafka_broker)
def put(self, request_id): try: from servicex.models import db add_file_request = request.get_json() submitted_request = TransformRequest.return_request(request_id) db_record = DatasetFile(request_id=request_id, file_path=add_file_request['file_path'], adler32=add_file_request['adler32'], file_events=add_file_request['file_events'], file_size=add_file_request['file_size']) self.lookup_result_processor.add_file_to_dataset(submitted_request, db_record) if self.elasticsearch_adaptor: self.elasticsearch_adaptor.create_update_path( db_record.get_path_id(), self._generate_file_status_record( db_record, "located") ) db.session.commit() return { "request-id": str(request_id), "file-id": db_record.id } except Exception: exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_tb(exc_traceback, limit=20, file=sys.stdout) print(exc_value) return {'message': 'Something went wrong: ' + str(exc_value)}, 500
def put(self, request_id): info = request.get_json() submitted_request = TransformRequest.return_request(request_id) dataset_file = DatasetFile.get_by_id(info['file-id']) rec = TransformationResult( did=submitted_request.did, file_id=dataset_file.id, request_id=request_id, file_path=info['file-path'], transform_status=info['status'], transform_time=info['total-time'], total_bytes=info['total-bytes'], total_events=info['total-events'], avg_rate=info['avg-rate'], messages=info['num-messages'] ) rec.save_to_db() if self.elasticsearch_adapter: self.elasticsearch_adapter.create_update_path( dataset_file.get_path_id(), self._generate_file_status_record(dataset_file, info['status'])) self.elasticsearch_adapter.create_update_request( request_id, self._generate_transformation_record(submitted_request, 'transforming')) files_remaining = TransformRequest.files_remaining(request_id) if files_remaining is not None and files_remaining <= 0: namespace = current_app.config['TRANSFORMER_NAMESPACE'] print("Job is all done... shutting down transformers") self.transformer_manager.shutdown_transformer_job(request_id, namespace) submitted_request.status = "Complete" submitted_request.save_to_db() if self.elasticsearch_adapter: self.elasticsearch_adapter.create_update_request( request_id, self._generate_transformation_record(submitted_request, 'complete')) print(info) db.session.commit() return "Ok"
def test_submit_transformation_with_root_file(self, mocker, mock_rabbit_adaptor, mock_code_gen_service, mock_docker_repo_adapter): mock_code_gen_service.generate_code_for_selection = mocker.Mock( return_value='my-cm') request = self._generate_transformation_request_xAOD_root_file() client = self._test_client( rabbit_adaptor=mock_rabbit_adaptor, code_gen_service=mock_code_gen_service, docker_repo_adapter=mock_docker_repo_adapter) response = client.post('/servicex/transformation', json=request) assert response.status_code == 200 request_id = response.json['request_id'] with client.application.app_context(): saved_obj = TransformRequest.return_request(request_id) assert saved_obj assert saved_obj.did == '123-45-678' assert saved_obj.request_id == request_id assert saved_obj.columns is None assert saved_obj.selection == 'test-string' assert saved_obj.image == 'ssl-hep/func_adl:latest' assert saved_obj.chunk_size is None assert saved_obj.workers == 10 assert saved_obj.result_destination == 'object-store' assert saved_obj.result_format == 'root-file' assert saved_obj.generated_code_cm == 'my-cm' setup_queue_calls = [call(request_id), call(request_id + "_errors")] mock_rabbit_adaptor.setup_queue.assert_has_calls(setup_queue_calls) bind_to_exchange_calls = [ call(exchange="transformation_requests", queue=request_id), call(exchange="transformation_failures", queue=request_id + "_errors"), ] assert mock_rabbit_adaptor.bind_queue_to_exchange.call_args_list == bind_to_exchange_calls service_endpoint = \ "http://cern.analysis.ch:5000/servicex/internal/transformation/" + \ request_id mock_rabbit_adaptor. \ basic_publish.assert_called_with(exchange='', routing_key='did_requests', body=json.dumps( { "request_id": request_id, "did": "123-45-678", "service-endpoint": service_endpoint} ))
def put(self, request_id): summary = request.get_json() rec = TransformRequest.return_request(request_id) self.lookup_result_processor.report_fileset_complete( rec, num_files=summary['files'], num_skipped=summary['files-skipped'], total_events=summary['total-events'], total_bytes=summary['total-bytes'], did_lookup_time=summary['elapsed-time']) db.session.commit()
def get(self, request_id): status_request = status_request_parser.parse_args() count = TransformationResult.count(request_id) stats = TransformationResult.statistics(request_id) failures = TransformationResult.failed_files(request_id) print(count, stats) print(TransformRequest.files_remaining(request_id)) result_dict = { "request-id": request_id, "files-processed": count - failures, "files-skipped": failures, "files-remaining": TransformRequest.files_remaining(request_id), "stats": stats } if status_request.details: result_dict['details'] = TransformationResult.to_json_list( TransformationResult.get_all_status(request_id)) return jsonify(result_dict)
def get(self, request_id=None): is_auth, auth_reject_message = self._validate_user() if not is_auth: return { 'message': f'Authentication Failed: {str(auth_reject_message)}' }, 401 if request_id: request_rec = TransformRequest.to_json( TransformRequest.return_request(request_id)) if current_app.config['OBJECT_STORE_ENABLED'] and \ request_rec['result-destination'] == TransformRequest.OBJECT_STORE_DEST: request_rec['minio-endpoint'] = current_app.config[ 'MINIO_PUBLIC_URL'] request_rec['minio-access-key'] = current_app.config[ 'MINIO_ACCESS_KEY'] request_rec['minio-secret-key'] = current_app.config[ 'MINIO_SECRET_KEY'] return request_rec else: return TransformRequest.return_all()
def test_submit_transformation(self, mock_rabbit_adaptor, mock_docker_repo_adapter): client = self._test_client( rabbit_adaptor=mock_rabbit_adaptor, docker_repo_adapter=mock_docker_repo_adapter) response = client.post('/servicex/transformation', json=self._generate_transformation_request()) assert response.status_code == 200 request_id = response.json['request_id'] with client.application.app_context(): saved_obj = TransformRequest.return_request(request_id) assert saved_obj assert saved_obj.did == '123-45-678' assert saved_obj.request_id == request_id assert saved_obj.columns == "e.e, e.p" assert saved_obj.image == 'ssl-hep/foo:latest' assert saved_obj.chunk_size == 500 assert saved_obj.workers == 10 assert saved_obj.result_destination == 'kafka' assert saved_obj.kafka_broker == "ssl.hep.kafka:12332" setup_queue_calls = [call(request_id), call(request_id + "_errors")] mock_rabbit_adaptor.setup_queue.assert_has_calls(setup_queue_calls) bind_to_exchange_calls = [ call(exchange="transformation_requests", queue=request_id), call(exchange="transformation_failures", queue=request_id + "_errors"), ] assert mock_rabbit_adaptor.bind_queue_to_exchange.call_args_list == bind_to_exchange_calls service_endpoint = \ "http://cern.analysis.ch:5000/servicex/internal/transformation/" + \ request_id mock_rabbit_adaptor. \ basic_publish.assert_called_with(exchange='', routing_key='did_requests', body=json.dumps( { "request_id": request_id, "did": "123-45-678", "service-endpoint": service_endpoint} ))
def get(self, request_id): is_auth, auth_reject_message = self._validate_user() if not is_auth: return { 'message': f'Authentication Failed: {str(auth_reject_message)}' }, 401 submitted_request = TransformRequest.return_request(request_id) if not submitted_request: return "Transform Not Found", "404" results = [{ "pod-name": result[1].pod_name, "file": result[0].file_path, "events": result[0].file_events, "info": result[1].info } for result in FileStatus.failures_for_request(request_id)] return {"errors": list(results)}
def post(self, request_id): body = request.get_json() submitted_request = TransformRequest.return_request(request_id) try: self.lookup_result_processor.publish_preflight_request( submitted_request, body['file_path'] ) return { "request-id": str(request_id), "file-id": 42 } except Exception: exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_tb(exc_traceback, limit=20, file=sys.stdout) print(exc_value) return {'message': 'Something went wrong'}, 500
def test_submit_transformation_with_object_store(self, mocker, mock_rabbit_adaptor, mock_docker_repo_adapter): from servicex import ObjectStoreManager local_config = { 'OBJECT_STORE_ENABLED': True, 'MINIO_URL': 'localhost:9000', 'MINIO_ACCESS_KEY': 'miniouser', 'MINIO_SECRET_KEY': 'leftfoot1' } transformation_request = { 'did': '123-45-678', 'columns': "e.e, e.p", 'image': 'ssl-hep/foo:latest', 'result-destination': 'object-store', 'result-format': 'parquet', 'chunk-size': 500, 'workers': 10 } mock_object_store = mocker.MagicMock(ObjectStoreManager) client = self._test_client( additional_config=local_config, rabbit_adaptor=mock_rabbit_adaptor, object_store=mock_object_store, docker_repo_adapter=mock_docker_repo_adapter) response = client.post('/servicex/transformation', json=transformation_request) assert response.status_code == 200 request_id = response.json['request_id'] mock_object_store.create_bucket.assert_called_with(request_id) with client.application.app_context(): saved_obj = TransformRequest.return_request(request_id) assert saved_obj assert saved_obj.result_destination == 'object-store' assert saved_obj.result_format == 'parquet'
def post(self, request_id): status = status_parser.parse_args() status.request_id = request_id if status.severity == "fatal": print("+--------------------------------------------+") print(r""" ______ _______ _ ______ _____ _____ ____ _____ | ____/\|__ __|/\ | | | ____| __ \| __ \ / __ \| __ \ | |__ / \ | | / \ | | | |__ | |__) | |__) | | | | |__) | | __/ /\ \ | | / /\ \ | | | __| | _ /| _ /| | | | _ / | | / ____ \| |/ ____ \| |____ | |____| | \ \| | \ \| |__| | | \ \ |_|/_/ \_\_/_/ \_\______| |______|_| \_\_| \_\\____/|_| \_\ """) print(f"+ Fatal error reported for {request_id} from {status.source}") print(status.info) print("+--------------------------------------------+") submitted_request = TransformRequest.return_request(request_id) submitted_request.status = 'Fatal' submitted_request.failure_description = status.info submitted_request.save_to_db() db.session.commit() else: print(status)
def _generate_transform_request(): transform_request = TransformRequest() transform_request.submit_time = 1000 transform_request.request_id = 'BR549' transform_request.columns = 'electron.eta(), muon.pt()' transform_request.tree_name = 'Events' transform_request.chunk_size = 1000 transform_request.workers = 42 transform_request.did = '123-456-789' transform_request.image = 'ssl-hep/foo:latest' transform_request.result_destination = 'kafka' transform_request.result_format = 'arrow' transform_request.kafka_broker = 'http://ssl-hep.org.kafka:12345' transform_request.total_events = 10000 transform_request.total_bytes = 1203 transform_request.status = "Submitted" return transform_request
def post(self): try: transformation_request = parser.parse_args() print("object store ", self.object_store) request_id = str(uuid.uuid4()) time = datetime.now(tz=timezone.utc) requested_did = transformation_request['did'] \ if 'did' in transformation_request else None requested_file_list = transformation_request['file-list'] \ if 'file-list' in transformation_request else None # requested_did xor requested_file_list if bool(requested_did) == bool(requested_file_list): raise BadRequest("Must provide did or file-list but not both") if self.object_store and \ transformation_request['result-destination'] == 'object-store': self.object_store.create_bucket(request_id) # WHat happens if object-store and object_store is None? if transformation_request['result-destination'] == 'kafka': broker = transformation_request['kafka']['broker'] else: broker = None request_rec = TransformRequest( did=requested_did if requested_did else "File List Provided in Request", submit_time=time, columns=transformation_request['columns'], selection=transformation_request['selection'], tree_name=transformation_request['tree-name'], request_id=str(request_id), image=transformation_request['image'], chunk_size=transformation_request['chunk-size'], result_destination=transformation_request[ 'result-destination'], result_format=transformation_request['result-format'], kafka_broker=broker, workers=transformation_request['workers'], workflow_name=_workflow_name(transformation_request)) # If we are doing the xaod_cpp workflow, then the first thing to do is make # sure the requested selection is correct, and generate the C++ files if request_rec.workflow_name == 'selection_codegen': namespace = current_app.config['TRANSFORMER_NAMESPACE'] request_rec.generated_code_cm = \ self.code_gen_service.generate_code_for_selection(request_rec, namespace) # Create queue for transformers to read from self.rabbitmq_adaptor.setup_queue(request_id) self.rabbitmq_adaptor.bind_queue_to_exchange( exchange="transformation_requests", queue=request_id) # Also setup an error queue for dead letters generated by transformer self.rabbitmq_adaptor.setup_queue(request_id + "_errors") self.rabbitmq_adaptor.bind_queue_to_exchange( exchange="transformation_failures", queue=request_id + "_errors") request_rec.save_to_db() if requested_did: did_request = { "request_id": request_rec.request_id, "did": request_rec.did, "service-endpoint": self._generate_advertised_endpoint( "servicex/transformation/" + request_rec.request_id) } self.rabbitmq_adaptor.basic_publish( exchange='', routing_key='did_requests', body=json.dumps(did_request)) else: # Request a preflight check on the first file self.lookup_result_processor.publish_preflight_request( request_rec, requested_file_list[0]) for file_path in requested_file_list: file_record = DatasetFile(request_id=request_id, file_path=file_path, adler32="xxx", file_events=0, file_size=0) self.lookup_result_processor.add_file_to_dataset( request_rec, file_record) self.lookup_result_processor.report_fileset_complete( request_rec, num_files=len(requested_file_list)) db.session.commit() if self.elasticsearch_adapter: self.elasticsearch_adapter.create_update_request( request_id, self._generate_transformation_record( request_rec, "locating DID")) return {"request_id": str(request_id)} except BadRequest as bad_request: return { 'message': f'The json request was malformed: {str(bad_request)}' }, 400 except ValueError as eek: return { 'message': f'Failed to submit transform request: {str(eek)}' }, 400 except Exception: exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_tb(exc_traceback, limit=20, file=sys.stdout) print(exc_value) return {'message': 'Something went wrong'}, 500
def _generate_test_request(self): transform_request = TransformRequest() transform_request.request_id = "462-33" transform_request.selection = "test-string" return transform_request