예제 #1
0
    def get(self, request_id):
        is_auth, auth_reject_message = self._validate_user()
        if not is_auth:
            return {'message': f'Authentication Failed: {str(auth_reject_message)}'}, 401

        submitted_request = TransformRequest.return_request(request_id)
        if not submitted_request:
            return "Transform Not Found", "404"

        status_request = status_request_parser.parse_args()

        count = TransformationResult.count(request_id)
        stats = TransformationResult.statistics(request_id)
        failures = TransformationResult.failed_files(request_id)
        print(count, stats)
        print(TransformRequest.files_remaining(request_id))
        result_dict = {
            "status": submitted_request.status,
            "request-id": request_id,
            "files-processed": count - failures,
            "files-skipped": failures,
            "files-remaining": TransformRequest.files_remaining(request_id),
            "stats": stats
        }

        if status_request.details:
            result_dict['details'] = TransformationResult.to_json_list(
                TransformationResult.get_all_status(request_id))

        return jsonify(result_dict)
예제 #2
0
 def get(self, request_id=None):
     if request_id:
         return TransformRequest.to_json(
             TransformRequest.return_request(request_id)
         )
     else:
         return TransformRequest.return_all()
 def report_fileset_complete(self, submitted_request,
                             num_files, num_skipped=0, total_events=0,
                             total_bytes=0, did_lookup_time=0):
     submitted_request.files = num_files
     submitted_request.files_skipped = num_skipped
     submitted_request.total_events = total_events
     submitted_request.total_bytes = total_bytes
     submitted_request.did_lookup_time = did_lookup_time
     TransformRequest.update_request(submitted_request)
예제 #4
0
    def post(self, request_id):
        from servicex.kafka_topic_manager import KafkaTopicManager
        submitted_request = TransformRequest.return_request(request_id)
        submitted_request.status = 'Running'
        submitted_request.save_to_db()
        db.session.commit()

        if current_app.config['TRANSFORMER_MANAGER_ENABLED']:

            if submitted_request.result_destination == 'kafka':
                # Setup the kafka topic with the correct number of partitions and max
                # message size
                max_message_size = 1920000
                kafka = KafkaTopicManager(submitted_request.kafka_broker)
                kafka.create_topic(request_id,
                                   max_message_size=max_message_size,
                                   num_partitions=100)

            rabbitmq_uri = current_app.config['TRANSFORMER_RABBIT_MQ_URL']
            namepsace = current_app.config['TRANSFORMER_NAMESPACE']
            x509_secret = current_app.config['TRANSFORMER_X509_SECRET']
            generated_code_cm = submitted_request.generated_code_cm

            self.transformer_manager.launch_transformer_jobs(
                image=submitted_request.image, request_id=request_id,
                workers=submitted_request.workers,
                chunk_size=submitted_request.chunk_size, rabbitmq_uri=rabbitmq_uri,
                namespace=namepsace,
                x509_secret=x509_secret,
                generated_code_cm=generated_code_cm,
                result_destination=submitted_request.result_destination,
                result_format=submitted_request.result_format,
                kafka_broker=submitted_request.kafka_broker)
예제 #5
0
    def put(self, request_id):
        try:
            from servicex.models import db
            add_file_request = request.get_json()
            submitted_request = TransformRequest.return_request(request_id)

            db_record = DatasetFile(request_id=request_id,
                                    file_path=add_file_request['file_path'],
                                    adler32=add_file_request['adler32'],
                                    file_events=add_file_request['file_events'],
                                    file_size=add_file_request['file_size'])

            self.lookup_result_processor.add_file_to_dataset(submitted_request, db_record)

            if self.elasticsearch_adaptor:
                self.elasticsearch_adaptor.create_update_path(
                    db_record.get_path_id(),
                    self._generate_file_status_record(
                        db_record, "located")
                )
            db.session.commit()

            return {
                "request-id": str(request_id),
                "file-id": db_record.id
            }

        except Exception:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            traceback.print_tb(exc_traceback, limit=20, file=sys.stdout)
            print(exc_value)
            return {'message': 'Something went wrong: ' + str(exc_value)}, 500
    def put(self, request_id):
        info = request.get_json()
        submitted_request = TransformRequest.return_request(request_id)
        dataset_file = DatasetFile.get_by_id(info['file-id'])

        rec = TransformationResult(
            did=submitted_request.did,
            file_id=dataset_file.id,
            request_id=request_id,
            file_path=info['file-path'],
            transform_status=info['status'],
            transform_time=info['total-time'],
            total_bytes=info['total-bytes'],
            total_events=info['total-events'],
            avg_rate=info['avg-rate'],
            messages=info['num-messages']
        )
        rec.save_to_db()

        if self.elasticsearch_adapter:
            self.elasticsearch_adapter.create_update_path(
                dataset_file.get_path_id(),
                self._generate_file_status_record(dataset_file, info['status']))

            self.elasticsearch_adapter.create_update_request(
                request_id,
                self._generate_transformation_record(submitted_request, 'transforming'))

        files_remaining = TransformRequest.files_remaining(request_id)
        if files_remaining is not None and files_remaining <= 0:
            namespace = current_app.config['TRANSFORMER_NAMESPACE']
            print("Job is all done... shutting down transformers")
            self.transformer_manager.shutdown_transformer_job(request_id, namespace)
            submitted_request.status = "Complete"
            submitted_request.save_to_db()

            if self.elasticsearch_adapter:
                self.elasticsearch_adapter.create_update_request(
                    request_id,
                    self._generate_transformation_record(submitted_request, 'complete'))

        print(info)
        db.session.commit()

        return "Ok"
    def test_submit_transformation_with_root_file(self, mocker,
                                                  mock_rabbit_adaptor,
                                                  mock_code_gen_service,
                                                  mock_docker_repo_adapter):
        mock_code_gen_service.generate_code_for_selection = mocker.Mock(
            return_value='my-cm')
        request = self._generate_transformation_request_xAOD_root_file()

        client = self._test_client(
            rabbit_adaptor=mock_rabbit_adaptor,
            code_gen_service=mock_code_gen_service,
            docker_repo_adapter=mock_docker_repo_adapter)
        response = client.post('/servicex/transformation', json=request)

        assert response.status_code == 200

        request_id = response.json['request_id']

        with client.application.app_context():
            saved_obj = TransformRequest.return_request(request_id)
            assert saved_obj
            assert saved_obj.did == '123-45-678'
            assert saved_obj.request_id == request_id
            assert saved_obj.columns is None
            assert saved_obj.selection == 'test-string'
            assert saved_obj.image == 'ssl-hep/func_adl:latest'
            assert saved_obj.chunk_size is None
            assert saved_obj.workers == 10
            assert saved_obj.result_destination == 'object-store'
            assert saved_obj.result_format == 'root-file'
            assert saved_obj.generated_code_cm == 'my-cm'

        setup_queue_calls = [call(request_id), call(request_id + "_errors")]
        mock_rabbit_adaptor.setup_queue.assert_has_calls(setup_queue_calls)

        bind_to_exchange_calls = [
            call(exchange="transformation_requests", queue=request_id),
            call(exchange="transformation_failures",
                 queue=request_id + "_errors"),
        ]

        assert mock_rabbit_adaptor.bind_queue_to_exchange.call_args_list == bind_to_exchange_calls

        service_endpoint = \
            "http://cern.analysis.ch:5000/servicex/internal/transformation/" + \
            request_id

        mock_rabbit_adaptor. \
            basic_publish.assert_called_with(exchange='',
                                             routing_key='did_requests',
                                             body=json.dumps(
                                                 {
                                                     "request_id": request_id,
                                                     "did": "123-45-678",
                                                     "service-endpoint": service_endpoint}
                                             ))
예제 #8
0
 def put(self, request_id):
     summary = request.get_json()
     rec = TransformRequest.return_request(request_id)
     self.lookup_result_processor.report_fileset_complete(
         rec,
         num_files=summary['files'],
         num_skipped=summary['files-skipped'],
         total_events=summary['total-events'],
         total_bytes=summary['total-bytes'],
         did_lookup_time=summary['elapsed-time'])
     db.session.commit()
예제 #9
0
    def get(self, request_id):
        status_request = status_request_parser.parse_args()

        count = TransformationResult.count(request_id)
        stats = TransformationResult.statistics(request_id)
        failures = TransformationResult.failed_files(request_id)
        print(count, stats)
        print(TransformRequest.files_remaining(request_id))
        result_dict = {
            "request-id": request_id,
            "files-processed": count - failures,
            "files-skipped": failures,
            "files-remaining": TransformRequest.files_remaining(request_id),
            "stats": stats
        }

        if status_request.details:
            result_dict['details'] = TransformationResult.to_json_list(
                TransformationResult.get_all_status(request_id))

        return jsonify(result_dict)
예제 #10
0
    def get(self, request_id=None):
        is_auth, auth_reject_message = self._validate_user()
        if not is_auth:
            return {
                'message': f'Authentication Failed: {str(auth_reject_message)}'
            }, 401

        if request_id:
            request_rec = TransformRequest.to_json(
                TransformRequest.return_request(request_id))

            if current_app.config['OBJECT_STORE_ENABLED'] and \
                    request_rec['result-destination'] == TransformRequest.OBJECT_STORE_DEST:
                request_rec['minio-endpoint'] = current_app.config[
                    'MINIO_PUBLIC_URL']
                request_rec['minio-access-key'] = current_app.config[
                    'MINIO_ACCESS_KEY']
                request_rec['minio-secret-key'] = current_app.config[
                    'MINIO_SECRET_KEY']
            return request_rec
        else:
            return TransformRequest.return_all()
    def test_submit_transformation(self, mock_rabbit_adaptor,
                                   mock_docker_repo_adapter):
        client = self._test_client(
            rabbit_adaptor=mock_rabbit_adaptor,
            docker_repo_adapter=mock_docker_repo_adapter)
        response = client.post('/servicex/transformation',
                               json=self._generate_transformation_request())

        assert response.status_code == 200

        request_id = response.json['request_id']

        with client.application.app_context():
            saved_obj = TransformRequest.return_request(request_id)
            assert saved_obj
            assert saved_obj.did == '123-45-678'
            assert saved_obj.request_id == request_id
            assert saved_obj.columns == "e.e, e.p"
            assert saved_obj.image == 'ssl-hep/foo:latest'
            assert saved_obj.chunk_size == 500
            assert saved_obj.workers == 10
            assert saved_obj.result_destination == 'kafka'
            assert saved_obj.kafka_broker == "ssl.hep.kafka:12332"

        setup_queue_calls = [call(request_id), call(request_id + "_errors")]
        mock_rabbit_adaptor.setup_queue.assert_has_calls(setup_queue_calls)

        bind_to_exchange_calls = [
            call(exchange="transformation_requests", queue=request_id),
            call(exchange="transformation_failures",
                 queue=request_id + "_errors"),
        ]

        assert mock_rabbit_adaptor.bind_queue_to_exchange.call_args_list == bind_to_exchange_calls

        service_endpoint = \
            "http://cern.analysis.ch:5000/servicex/internal/transformation/" + \
            request_id

        mock_rabbit_adaptor. \
            basic_publish.assert_called_with(exchange='',
                                             routing_key='did_requests',
                                             body=json.dumps(
                                                 {
                                                     "request_id": request_id,
                                                     "did": "123-45-678",
                                                     "service-endpoint": service_endpoint}
                                             ))
예제 #12
0
    def get(self, request_id):
        is_auth, auth_reject_message = self._validate_user()
        if not is_auth:
            return {
                'message': f'Authentication Failed: {str(auth_reject_message)}'
            }, 401

        submitted_request = TransformRequest.return_request(request_id)
        if not submitted_request:
            return "Transform Not Found", "404"

        results = [{
            "pod-name": result[1].pod_name,
            "file": result[0].file_path,
            "events": result[0].file_events,
            "info": result[1].info
        } for result in FileStatus.failures_for_request(request_id)]
        return {"errors": list(results)}
예제 #13
0
    def post(self, request_id):
        body = request.get_json()
        submitted_request = TransformRequest.return_request(request_id)

        try:
            self.lookup_result_processor.publish_preflight_request(
                submitted_request,
                body['file_path']
            )

            return {
                "request-id": str(request_id),
                "file-id": 42
            }

        except Exception:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            traceback.print_tb(exc_traceback, limit=20, file=sys.stdout)
            print(exc_value)
            return {'message': 'Something went wrong'}, 500
    def test_submit_transformation_with_object_store(self, mocker,
                                                     mock_rabbit_adaptor,
                                                     mock_docker_repo_adapter):
        from servicex import ObjectStoreManager

        local_config = {
            'OBJECT_STORE_ENABLED': True,
            'MINIO_URL': 'localhost:9000',
            'MINIO_ACCESS_KEY': 'miniouser',
            'MINIO_SECRET_KEY': 'leftfoot1'
        }

        transformation_request = {
            'did': '123-45-678',
            'columns': "e.e, e.p",
            'image': 'ssl-hep/foo:latest',
            'result-destination': 'object-store',
            'result-format': 'parquet',
            'chunk-size': 500,
            'workers': 10
        }

        mock_object_store = mocker.MagicMock(ObjectStoreManager)
        client = self._test_client(
            additional_config=local_config,
            rabbit_adaptor=mock_rabbit_adaptor,
            object_store=mock_object_store,
            docker_repo_adapter=mock_docker_repo_adapter)
        response = client.post('/servicex/transformation',
                               json=transformation_request)
        assert response.status_code == 200

        request_id = response.json['request_id']

        mock_object_store.create_bucket.assert_called_with(request_id)
        with client.application.app_context():
            saved_obj = TransformRequest.return_request(request_id)
            assert saved_obj
            assert saved_obj.result_destination == 'object-store'
            assert saved_obj.result_format == 'parquet'
예제 #15
0
    def post(self, request_id):
        status = status_parser.parse_args()
        status.request_id = request_id
        if status.severity == "fatal":
            print("+--------------------------------------------+")
            print(r"""
  ______   _______       _        ______ _____  _____   ____  _____
 |  ____/\|__   __|/\   | |      |  ____|  __ \|  __ \ / __ \|  __ \
 | |__ /  \  | |  /  \  | |      | |__  | |__) | |__) | |  | | |__) |
 |  __/ /\ \ | | / /\ \ | |      |  __| |  _  /|  _  /| |  | |  _  /
 | | / ____ \| |/ ____ \| |____  | |____| | \ \| | \ \| |__| | | \ \
 |_|/_/    \_\_/_/    \_\______| |______|_|  \_\_|  \_\\____/|_|  \_\
            """)
            print(f"+ Fatal error reported for {request_id} from {status.source}")
            print(status.info)
            print("+--------------------------------------------+")

            submitted_request = TransformRequest.return_request(request_id)
            submitted_request.status = 'Fatal'
            submitted_request.failure_description = status.info
            submitted_request.save_to_db()
            db.session.commit()
        else:
            print(status)
예제 #16
0
 def _generate_transform_request():
     transform_request = TransformRequest()
     transform_request.submit_time = 1000
     transform_request.request_id = 'BR549'
     transform_request.columns = 'electron.eta(), muon.pt()'
     transform_request.tree_name = 'Events'
     transform_request.chunk_size = 1000
     transform_request.workers = 42
     transform_request.did = '123-456-789'
     transform_request.image = 'ssl-hep/foo:latest'
     transform_request.result_destination = 'kafka'
     transform_request.result_format = 'arrow'
     transform_request.kafka_broker = 'http://ssl-hep.org.kafka:12345'
     transform_request.total_events = 10000
     transform_request.total_bytes = 1203
     transform_request.status = "Submitted"
     return transform_request
    def post(self):
        try:
            transformation_request = parser.parse_args()
            print("object store ", self.object_store)

            request_id = str(uuid.uuid4())
            time = datetime.now(tz=timezone.utc)

            requested_did = transformation_request['did'] \
                if 'did' in transformation_request else None
            requested_file_list = transformation_request['file-list'] \
                if 'file-list' in transformation_request else None

            # requested_did xor requested_file_list
            if bool(requested_did) == bool(requested_file_list):
                raise BadRequest("Must provide did or file-list but not both")

            if self.object_store and \
                    transformation_request['result-destination'] == 'object-store':
                self.object_store.create_bucket(request_id)
                # WHat happens if object-store and object_store is None?

            if transformation_request['result-destination'] == 'kafka':
                broker = transformation_request['kafka']['broker']
            else:
                broker = None

            request_rec = TransformRequest(
                did=requested_did
                if requested_did else "File List Provided in Request",
                submit_time=time,
                columns=transformation_request['columns'],
                selection=transformation_request['selection'],
                tree_name=transformation_request['tree-name'],
                request_id=str(request_id),
                image=transformation_request['image'],
                chunk_size=transformation_request['chunk-size'],
                result_destination=transformation_request[
                    'result-destination'],
                result_format=transformation_request['result-format'],
                kafka_broker=broker,
                workers=transformation_request['workers'],
                workflow_name=_workflow_name(transformation_request))

            # If we are doing the xaod_cpp workflow, then the first thing to do is make
            # sure the requested selection is correct, and generate the C++ files
            if request_rec.workflow_name == 'selection_codegen':
                namespace = current_app.config['TRANSFORMER_NAMESPACE']
                request_rec.generated_code_cm = \
                    self.code_gen_service.generate_code_for_selection(request_rec, namespace)

            # Create queue for transformers to read from
            self.rabbitmq_adaptor.setup_queue(request_id)

            self.rabbitmq_adaptor.bind_queue_to_exchange(
                exchange="transformation_requests", queue=request_id)

            # Also setup an error queue for dead letters generated by transformer
            self.rabbitmq_adaptor.setup_queue(request_id + "_errors")

            self.rabbitmq_adaptor.bind_queue_to_exchange(
                exchange="transformation_failures",
                queue=request_id + "_errors")

            request_rec.save_to_db()

            if requested_did:
                did_request = {
                    "request_id":
                    request_rec.request_id,
                    "did":
                    request_rec.did,
                    "service-endpoint":
                    self._generate_advertised_endpoint(
                        "servicex/transformation/" + request_rec.request_id)
                }

                self.rabbitmq_adaptor.basic_publish(
                    exchange='',
                    routing_key='did_requests',
                    body=json.dumps(did_request))
            else:
                # Request a preflight check on the first file
                self.lookup_result_processor.publish_preflight_request(
                    request_rec, requested_file_list[0])

                for file_path in requested_file_list:
                    file_record = DatasetFile(request_id=request_id,
                                              file_path=file_path,
                                              adler32="xxx",
                                              file_events=0,
                                              file_size=0)
                    self.lookup_result_processor.add_file_to_dataset(
                        request_rec, file_record)

                self.lookup_result_processor.report_fileset_complete(
                    request_rec, num_files=len(requested_file_list))

            db.session.commit()

            if self.elasticsearch_adapter:
                self.elasticsearch_adapter.create_update_request(
                    request_id,
                    self._generate_transformation_record(
                        request_rec, "locating DID"))

            return {"request_id": str(request_id)}
        except BadRequest as bad_request:
            return {
                'message':
                f'The json request was malformed: {str(bad_request)}'
            }, 400
        except ValueError as eek:
            return {
                'message': f'Failed to submit transform request: {str(eek)}'
            }, 400
        except Exception:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            traceback.print_tb(exc_traceback, limit=20, file=sys.stdout)
            print(exc_value)
            return {'message': 'Something went wrong'}, 500
 def _generate_test_request(self):
     transform_request = TransformRequest()
     transform_request.request_id = "462-33"
     transform_request.selection = "test-string"
     return transform_request