# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. import enum from firebase_admin.db import reference as realtime from firebase_admin.firestore import client as cfs from google.cloud import firestore from aet.logger import get_logger from app import config, utils LOG = get_logger('HELPERS') class MessageHandlingException(Exception): # A simple way to handle the variety of expected misbehaviors in message sync # Between Aether and Firebase pass class SyncMode(enum.Enum): SYNC = 1 # Firebase <-> Aether FORWARD = 2 # Firebase -> Aether CONSUME = 3 # Firebase <- Aether NONE = 4 # Firebase | Aether
from confluent_kafka import Producer as KafkaProducer from aet.jsonpath import CachedParser from aet.resource import ResourceDefinition from aet.logger import get_logger from aet.kafka import KafkaConsumer, FilterConfig, MaskConfig from app.config import get_kafka_config, get_kafka_admin_config from app.fixtures.schemas import ERROR_LOG_AVRO from . import TransformationError from .event import Event, KafkaMessage, TestEvent, ZeebeJob from .kafka import TopicHelper from .zb import ZeebeConnection LOG = get_logger('pipe') KAFKA_CONFIG = get_kafka_config() @dataclass class Transition: input_map: Dict output_map: Dict = None pass_condition: str = None fail_condition: str = None @staticmethod def handle_parser_results(matches): if matches: if len(matches) > 1: return [i.value for i in matches]
from io import BytesIO import json import re from typing import Callable, Dict, List, Union from spavro import schema from spavro.datafile import DataFileWriter from spavro.io import (DatumWriter, validate as validate_schema) from aet.kafka_utils import create_topic, get_admin_client, get_broker_info from aet.logger import get_logger from app.config import get_kafka_admin_config LOG = get_logger('KAFKA') ADMIN_CONFIG = get_kafka_admin_config() RE_VALID_KAFKA_TOPIC = re.compile(r'[a-z0-9.-]') class TopicHelper(object): @staticmethod def parse(schema_definition) -> schema.Schema: return schema.parse(json.dumps(schema_definition)) @staticmethod def valid_topic(name) -> bool: return RE_VALID_KAFKA_TOPIC.match(name) is not None and len(name) < 255 def __init__(self, schema_definition, tenant, topic): self.schema = TopicHelper.parse(schema_definition)
from typing import (Any, Dict, List, Tuple, Mapping) from firebase_admin.db import reference as rtdb_reference from firebase_admin.firestore import client as cfs_client from google.cloud import firestore import spavro.schema import spavro.io from aether.python.avro import tools as avro_tools from aet.logger import get_logger from .schema_utils import (add_id_field, coersce_or_fail, contains_id) from .config import get_function_config CONF = get_function_config() LOG = get_logger('Utils') _BASE_PATH = CONF.get('BASE_PATH') _SYNC = CONF.get('SYNC_PATH') or '_sync_queue' _SYNC_QUEUE = f'{_BASE_PATH}/{_SYNC}' _NORMAL_CACHE = f'{_BASE_PATH}/_cached' _QUARANTINE_CACHE = f'{_BASE_PATH}/_quarantined' @dataclass class InputSet: name: str docs: List[Tuple[str, Any]] # (_id, doc) options: Dict schema: Dict
Any, Callable, Dict, Iterable, List, Tuple, Union ) from aet.resource import ResourceDefinition from aet.logger import get_logger from . import TransformationError, type_checker LOG = get_logger('JS') class JSHelper(object): @staticmethod def get_file(url: str) -> str: res = requests.get(url) res.raise_for_status() return res.text def __init__(self, definition: ResourceDefinition): self._function = None self.definition = definition try: self._setup()
from aet.consumer import BaseConsumer from aet.logger import get_logger from app import artifacts LOG = get_logger('MAIN') class StreamConsumer(BaseConsumer): def __init__(self, CON_CONF=None, KAFKA_CONF=None, redis_instance=None): self.job_class = artifacts.Job super(StreamConsumer, self).__init__(CON_CONF, KAFKA_CONF, self.job_class, redis_instance=redis_instance)
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. from enum import Enum import json import firebase_admin from aet.logger import get_logger from .config import get_function_config from .hash import make_hash from . import fb_utils LOG = get_logger('mv') CONF = get_function_config() RTDB = None CFS = None APP = None class DBType(Enum): RTDB = 0 CFS = 1 class Mode(Enum): SYNC = 0 PUSH = 1
# KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. import json from typing import Any, Callable, Dict, List, Mapping from aet.logger import get_logger from aether.python.avro.schema import Node from . import config from . import index_handler from .processor import ES_RESERVED consumer_config = config.get_consumer_config() LOG = get_logger('VIS') AVRO_TYPES = [a_type for a_type, es_type in config.AVRO_TYPES] AETHER_TYPES = [a_type for a_type, es_type in config.AETHER_TYPES] def format_vis(fn): def do_format(*args, **kwargs): title = kwargs['title'] alias = kwargs['alias'] node = kwargs['node'] field_name = kwargs['field_name'] sub = kwargs['subscription'] label = node.doc if hasattr(node, 'doc') else node.name vis_state = fn(title, alias, label, field_name, node, sub) source_search = {
import json import pytest import requests import responses from time import sleep from elasticsearch.exceptions import NotFoundError from aet.logger import get_logger from app import index_handler from . import * # noqa # fixtures LOG = get_logger('TEST-IDX') # convenience function for jsonpath @responses.activate @pytest.mark.unit def test__handle_http(): responses.add(responses.GET, 'http://bad-url', json={'error': 'not found'}, status=404) res = requests.get('http://bad-url') with pytest.raises(requests.exceptions.HTTPError): index_handler.handle_http(res)
# software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. import pytest from aet.logger import get_logger from app import processor from . import * # noqa # fixtures from . import (TYPE_INSTRUCTIONS) LOG = get_logger('TEST-PRO') @pytest.mark.parametrize('_type,test_value,expected', [ ('date', 1, '1970-01-02'), ]) @pytest.mark.unit def test__required_avro_logical_coersions(_type, test_value, expected): _fn = processor.AVRO_LOGICAL_COERCE[_type] res = _fn(test_value) assert (res == expected) @pytest.mark.unit def test__end_to_end(ComplexSchema): _name = 'test'
from aet.logger import get_logger from aet.kafka_utils import (create_topic, get_producer, get_admin_client, get_broker_info, produce) from .config import (get_function_config, get_kafka_admin_config) from . import fb_utils CONF = get_function_config() MAX_KAFKA_MESSAGE_SIZE = int( CONF.get('MAX_KAFKA_MESSAGE_SIZE', 100_000)) # keep things reasonably sized, MAX is 2mb KAFKA_SECURITY = get_kafka_admin_config() KADMIN = get_admin_client(KAFKA_SECURITY) PRODUCER = get_producer(KAFKA_SECURITY) _logger = get_logger('KAFKA') def publish(objs: List[Tuple[str, Any]], schema: Dict, _type: str, rtdb=None, max_size=MAX_KAFKA_MESSAGE_SIZE): _prepare_kafka(_type) # have to split out _publish because it can be called on failure and # we only want to try to create the topic once res = _publish_kafka( [i for (_id, i) in objs ], # strip out _ids, must be in the doc at this point schema, _type,
from aet.exceptions import ConsumerHttpException from aet.logger import get_logger from aet.resource import BaseResource, Draft7Validator, ValidationError from aet.jsonpath import CachedParser from app.fixtures import schemas from app.helpers import check_required, TransformationError from app.helpers.js import JSHelper from app.helpers.kafka import TopicHelper from app.helpers.rest import RestHelper from app.helpers.event import (TestEvent, ZeebeJob) from app.helpers.pipeline import (PipelineContext, Transition) from app.helpers.zb import ZeebeConnection LOG = get_logger('transformers') class Transformation(BaseResource): schema = schemas.BASIC name = '__transformation' # should not be directly created... jobs_path = None public_actions = BaseResource.public_actions + ['test'] def run(self, context: PipelineContext, transition: Transition) -> Dict: local_context = transition.prepare_input(context.data, self.definition) try: result = self.do_work(local_context) output = transition.prepare_output(result, self.definition) transition.check_failure(output)
# KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. from typing import List import firebase_admin from aet.logger import get_logger from .config import get_function_config from . import fb_utils from .fb_utils import InputSet, InputManager from . import kafka_utils _logger = get_logger('EXPORT') CONF = get_function_config() RTDB: fb_utils.RTDBTarget = None class ExportManager(): def __init__(self, rtdb=None): # set destination self._connect_firebase(rtdb) self.manager = InputManager(self.rtdb) def _connect_firebase(self, rtdb=None): if not rtdb: self._init_global_firebase() global RTDB
from aet.exceptions import ConsumerHttpException from aet.job import BaseJob, JobStatus from aet.kafka import KafkaConsumer, FilterConfig, MaskConfig from aet.logger import callback_logger, get_logger from aet.resource import BaseResource, lock from werkzeug.local import LocalProxy # Aether python lib # from aether.python.avro.schema import Node from app.config import get_consumer_config, get_kafka_config from app.fixtures import schemas from app import helpers LOG = get_logger('artifacts') CONSUMER_CONFIG = get_consumer_config() KAFKA_CONFIG = get_kafka_config() class FirebaseInstance(BaseResource): schema = schemas.FB_INSTANCE jobs_path = '$.firebase' name = 'firebase' public_actions = BaseResource.public_actions + [ 'test_connection' ] app: firebase_admin.App = None cfs: firestore.Client = None rtdb: helpers.RTDB = None
import json from typing import Any, Mapping from requests import Session from requests.exceptions import HTTPError from aet.exceptions import ConsumerHttpException from aet.logger import get_logger from aether.python.avro.schema import Node from . import config from .processor import ES_RESERVED from .visualization import (auto_visualizations, schema_defined_visualizations) from . import utils LOG = get_logger('INDEX') consumer_config = config.get_consumer_config() kafka_config = config.get_kafka_config() # Kibana Change Handler def kibana_handle_schema_change( tenant: str, alias_name: str, schema_old: Mapping[Any, Any], schema_new: Mapping[Any, Any], subscription: Mapping[str, Any], # Subscription.definition es_index: Mapping[Any, Any], es_conn, kibana_conn):
from aet.resource import ResourceDefinition from aether.python.avro import generation from aether.python.avro.schema import Node from app import config from app.fixtures import examples from app.processor import ESItemProcessor from app.artifacts import Subscription, ESJob, LocalESInstance from app import consumer CONSUMER_CONFIG = config.consumer_config KAFKA_CONFIG = config.get_kafka_config() LOG = get_logger('FIXTURE') # Some of the fixtures are non-compliant so we don't QA this file. # flake8: noqa URL = 'http://localhost:9013' # pick a random tenant for each run so we don't need to wipe ES. TS = str(uuid4()).replace('-', '')[:8] TENANT = f'TEN{TS}' TEST_TOPIC = 'es_test_topic' # instances of samples pushed to Kafka GENERATED_SAMPLES = {}
from aet.exceptions import MessageHandlingException from aet.job import BaseJob, JobManager, JobStatus from aet.jsonpath import CachedParser # noqa from aet.kafka import KafkaConsumer from aet.logger import get_logger from aet.resource import ( # noqa BaseResource, BASE_PUBLIC_ACTIONS, lock, MethodDesc ) from .assets.schemas import test_schemas LOG = get_logger('Test') here = os.path.dirname(os.path.realpath(__file__)) kafka_server = 'kafka-test:29092' kafka_connection_retry = 10 kafka_connection_retry_wait = 6 # increasing topic_size may cause poll to be unable to get all the messages in one call. # needs to be even an if > 100 a multiple of 100. topic_size = 500 TestResourceDef1 = {'id': '1', 'username': '******', 'password': '******'} class BadResource(BaseResource):
# under the License. import pytest import requests import json from time import sleep from . import * # noqa from . import ( # noqa # for the linter ElasticsearchConsumer, RequestClientT1, RequestClientT2, URL, check_local_es_readyness) from aet.logger import get_logger from app.fixtures import examples LOG = get_logger('TEST') ''' API Tests ''' @pytest.mark.unit def test__consumer_add_delete_respect_tenants(ElasticsearchConsumer, RequestClientT1, RequestClientT2): res = RequestClientT1.post(f'{URL}/elasticsearch/add', json=examples.ES_INSTANCE) assert (res.json() is True) res = RequestClientT1.get(f'{URL}/elasticsearch/list') assert (res.json() != []) res = RequestClientT2.get(f'{URL}/elasticsearch/list')
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. from datetime import datetime, timedelta import json from aet.logger import get_logger from aet.jsonpath import CachedParser from aether.python.utils import replace_nested from aether.python.avro.schema import Node from .config import get_consumer_config, AVRO_TYPES LOG = get_logger('PROCESS') CONSUMER_CONFIG = get_consumer_config() ES_RESERVED = [ '_uid', '_id', '_type', '_source', '_all', '_field_names', '_routing', '_index', '_size', '_timestamp', '_ttl', '_version', '_submitted_at', '_surveyor' ] AVRO_BASE_COERCE = { # avro_type -> handler } AVRO_LOGICAL_COERCE = { # logical_avro_type -> handler # int(days since epoch) -> iso_string