Exemple #1
0
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import enum

from firebase_admin.db import reference as realtime
from firebase_admin.firestore import client as cfs

from google.cloud import firestore
from aet.logger import get_logger

from app import config, utils

LOG = get_logger('HELPERS')


class MessageHandlingException(Exception):
    # A simple way to handle the variety of expected misbehaviors in message sync
    # Between Aether and Firebase
    pass


class SyncMode(enum.Enum):
    SYNC = 1  # Firebase  <->  Aether
    FORWARD = 2  # Firebase   ->  Aether
    CONSUME = 3  # Firebase  <-   Aether
    NONE = 4  # Firebase   |   Aether

Exemple #2
0
from confluent_kafka import Producer as KafkaProducer

from aet.jsonpath import CachedParser
from aet.resource import ResourceDefinition
from aet.logger import get_logger
from aet.kafka import KafkaConsumer, FilterConfig, MaskConfig

from app.config import get_kafka_config, get_kafka_admin_config
from app.fixtures.schemas import ERROR_LOG_AVRO
from . import TransformationError
from .event import Event, KafkaMessage, TestEvent, ZeebeJob
from .kafka import TopicHelper
from .zb import ZeebeConnection

LOG = get_logger('pipe')
KAFKA_CONFIG = get_kafka_config()


@dataclass
class Transition:
    input_map: Dict
    output_map: Dict = None
    pass_condition: str = None
    fail_condition: str = None

    @staticmethod
    def handle_parser_results(matches):
        if matches:
            if len(matches) > 1:
                return [i.value for i in matches]
from io import BytesIO
import json
import re
from typing import Callable, Dict, List, Union

from spavro import schema
from spavro.datafile import DataFileWriter
from spavro.io import (DatumWriter, validate as validate_schema)

from aet.kafka_utils import create_topic, get_admin_client, get_broker_info
from aet.logger import get_logger

from app.config import get_kafka_admin_config

LOG = get_logger('KAFKA')
ADMIN_CONFIG = get_kafka_admin_config()
RE_VALID_KAFKA_TOPIC = re.compile(r'[a-z0-9.-]')


class TopicHelper(object):
    @staticmethod
    def parse(schema_definition) -> schema.Schema:
        return schema.parse(json.dumps(schema_definition))

    @staticmethod
    def valid_topic(name) -> bool:
        return RE_VALID_KAFKA_TOPIC.match(name) is not None and len(name) < 255

    def __init__(self, schema_definition, tenant, topic):
        self.schema = TopicHelper.parse(schema_definition)
from typing import (Any, Dict, List, Tuple, Mapping)

from firebase_admin.db import reference as rtdb_reference
from firebase_admin.firestore import client as cfs_client
from google.cloud import firestore
import spavro.schema
import spavro.io

from aether.python.avro import tools as avro_tools
from aet.logger import get_logger

from .schema_utils import (add_id_field, coersce_or_fail, contains_id)
from .config import get_function_config

CONF = get_function_config()
LOG = get_logger('Utils')

_BASE_PATH = CONF.get('BASE_PATH')
_SYNC = CONF.get('SYNC_PATH') or '_sync_queue'
_SYNC_QUEUE = f'{_BASE_PATH}/{_SYNC}'
_NORMAL_CACHE = f'{_BASE_PATH}/_cached'
_QUARANTINE_CACHE = f'{_BASE_PATH}/_quarantined'


@dataclass
class InputSet:
    name: str
    docs: List[Tuple[str, Any]]  # (_id, doc)
    options: Dict
    schema: Dict
Exemple #5
0
    Any,
    Callable,
    Dict,
    Iterable,
    List,
    Tuple,
    Union
)

from aet.resource import ResourceDefinition
from aet.logger import get_logger

from . import TransformationError, type_checker


LOG = get_logger('JS')


class JSHelper(object):

    @staticmethod
    def get_file(url: str) -> str:
        res = requests.get(url)
        res.raise_for_status()
        return res.text

    def __init__(self, definition: ResourceDefinition):
        self._function = None
        self.definition = definition
        try:
            self._setup()
from aet.consumer import BaseConsumer
from aet.logger import get_logger

from app import artifacts

LOG = get_logger('MAIN')


class StreamConsumer(BaseConsumer):
    def __init__(self, CON_CONF=None, KAFKA_CONF=None, redis_instance=None):
        self.job_class = artifacts.Job
        super(StreamConsumer, self).__init__(CON_CONF,
                                             KAFKA_CONF,
                                             self.job_class,
                                             redis_instance=redis_instance)
Exemple #7
0
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

from enum import Enum
import json

import firebase_admin

from aet.logger import get_logger
from .config import get_function_config
from .hash import make_hash
from . import fb_utils

LOG = get_logger('mv')
CONF = get_function_config()
RTDB = None
CFS = None
APP = None


class DBType(Enum):
    RTDB = 0
    CFS = 1


class Mode(Enum):
    SYNC = 0
    PUSH = 1
Exemple #8
0
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import json
from typing import Any, Callable, Dict, List, Mapping

from aet.logger import get_logger
from aether.python.avro.schema import Node

from . import config
from . import index_handler
from .processor import ES_RESERVED

consumer_config = config.get_consumer_config()
LOG = get_logger('VIS')

AVRO_TYPES = [a_type for a_type, es_type in config.AVRO_TYPES]
AETHER_TYPES = [a_type for a_type, es_type in config.AETHER_TYPES]


def format_vis(fn):
    def do_format(*args, **kwargs):
        title = kwargs['title']
        alias = kwargs['alias']
        node = kwargs['node']
        field_name = kwargs['field_name']
        sub = kwargs['subscription']
        label = node.doc if hasattr(node, 'doc') else node.name
        vis_state = fn(title, alias, label, field_name, node, sub)
        source_search = {
import json
import pytest
import requests
import responses
from time import sleep

from elasticsearch.exceptions import NotFoundError

from aet.logger import get_logger

from app import index_handler

from . import *  # noqa  # fixtures

LOG = get_logger('TEST-IDX')

# convenience function for jsonpath


@responses.activate
@pytest.mark.unit
def test__handle_http():
    responses.add(responses.GET,
                  'http://bad-url',
                  json={'error': 'not found'},
                  status=404)
    res = requests.get('http://bad-url')
    with pytest.raises(requests.exceptions.HTTPError):
        index_handler.handle_http(res)
Exemple #10
0
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import pytest

from aet.logger import get_logger

from app import processor

from . import *  # noqa  # fixtures
from . import (TYPE_INSTRUCTIONS)

LOG = get_logger('TEST-PRO')


@pytest.mark.parametrize('_type,test_value,expected', [
    ('date', 1, '1970-01-02'),
])
@pytest.mark.unit
def test__required_avro_logical_coersions(_type, test_value, expected):
    _fn = processor.AVRO_LOGICAL_COERCE[_type]
    res = _fn(test_value)
    assert (res == expected)


@pytest.mark.unit
def test__end_to_end(ComplexSchema):
    _name = 'test'
Exemple #11
0
from aet.logger import get_logger
from aet.kafka_utils import (create_topic, get_producer, get_admin_client,
                             get_broker_info, produce)

from .config import (get_function_config, get_kafka_admin_config)
from . import fb_utils

CONF = get_function_config()
MAX_KAFKA_MESSAGE_SIZE = int(
    CONF.get('MAX_KAFKA_MESSAGE_SIZE',
             100_000))  # keep things reasonably sized, MAX is 2mb

KAFKA_SECURITY = get_kafka_admin_config()
KADMIN = get_admin_client(KAFKA_SECURITY)
PRODUCER = get_producer(KAFKA_SECURITY)
_logger = get_logger('KAFKA')


def publish(objs: List[Tuple[str, Any]],
            schema: Dict,
            _type: str,
            rtdb=None,
            max_size=MAX_KAFKA_MESSAGE_SIZE):
    _prepare_kafka(_type)
    # have to split out _publish because it can be called on failure and
    # we only want to try to create the topic once
    res = _publish_kafka(
        [i for (_id, i) in objs
         ],  # strip out _ids, must be in the doc at this point
        schema,
        _type,
Exemple #12
0
from aet.exceptions import ConsumerHttpException
from aet.logger import get_logger
from aet.resource import BaseResource, Draft7Validator, ValidationError
from aet.jsonpath import CachedParser

from app.fixtures import schemas
from app.helpers import check_required, TransformationError
from app.helpers.js import JSHelper
from app.helpers.kafka import TopicHelper
from app.helpers.rest import RestHelper
from app.helpers.event import (TestEvent, ZeebeJob)
from app.helpers.pipeline import (PipelineContext, Transition)
from app.helpers.zb import ZeebeConnection

LOG = get_logger('transformers')


class Transformation(BaseResource):
    schema = schemas.BASIC
    name = '__transformation'  # should not be directly created...
    jobs_path = None

    public_actions = BaseResource.public_actions + ['test']

    def run(self, context: PipelineContext, transition: Transition) -> Dict:
        local_context = transition.prepare_input(context.data, self.definition)
        try:
            result = self.do_work(local_context)
            output = transition.prepare_output(result, self.definition)
            transition.check_failure(output)
Exemple #13
0
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

from typing import List

import firebase_admin

from aet.logger import get_logger

from .config import get_function_config
from . import fb_utils
from .fb_utils import InputSet, InputManager
from . import kafka_utils

_logger = get_logger('EXPORT')

CONF = get_function_config()
RTDB: fb_utils.RTDBTarget = None


class ExportManager():
    def __init__(self, rtdb=None):
        # set destination
        self._connect_firebase(rtdb)
        self.manager = InputManager(self.rtdb)

    def _connect_firebase(self, rtdb=None):
        if not rtdb:
            self._init_global_firebase()
            global RTDB
from aet.exceptions import ConsumerHttpException
from aet.job import BaseJob, JobStatus
from aet.kafka import KafkaConsumer, FilterConfig, MaskConfig
from aet.logger import callback_logger, get_logger
from aet.resource import BaseResource, lock
from werkzeug.local import LocalProxy

# Aether python lib
# from aether.python.avro.schema import Node

from app.config import get_consumer_config, get_kafka_config
from app.fixtures import schemas

from app import helpers

LOG = get_logger('artifacts')
CONSUMER_CONFIG = get_consumer_config()
KAFKA_CONFIG = get_kafka_config()


class FirebaseInstance(BaseResource):
    schema = schemas.FB_INSTANCE
    jobs_path = '$.firebase'
    name = 'firebase'
    public_actions = BaseResource.public_actions + [
        'test_connection'
    ]

    app: firebase_admin.App = None
    cfs: firestore.Client = None
    rtdb: helpers.RTDB = None
import json
from typing import Any, Mapping

from requests import Session
from requests.exceptions import HTTPError

from aet.exceptions import ConsumerHttpException
from aet.logger import get_logger
from aether.python.avro.schema import Node

from . import config
from .processor import ES_RESERVED
from .visualization import (auto_visualizations, schema_defined_visualizations)
from . import utils

LOG = get_logger('INDEX')
consumer_config = config.get_consumer_config()
kafka_config = config.get_kafka_config()

# Kibana Change Handler


def kibana_handle_schema_change(
        tenant: str,
        alias_name: str,
        schema_old: Mapping[Any, Any],
        schema_new: Mapping[Any, Any],
        subscription: Mapping[str, Any],  # Subscription.definition
        es_index: Mapping[Any, Any],
        es_conn,
        kibana_conn):
Exemple #16
0
from aet.resource import ResourceDefinition

from aether.python.avro import generation
from aether.python.avro.schema import Node

from app import config
from app.fixtures import examples
from app.processor import ESItemProcessor
from app.artifacts import Subscription, ESJob, LocalESInstance

from app import consumer

CONSUMER_CONFIG = config.consumer_config
KAFKA_CONFIG = config.get_kafka_config()

LOG = get_logger('FIXTURE')

# Some of the fixtures are non-compliant so we don't QA this file.
# flake8: noqa

URL = 'http://localhost:9013'

# pick a random tenant for each run so we don't need to wipe ES.
TS = str(uuid4()).replace('-', '')[:8]
TENANT = f'TEN{TS}'
TEST_TOPIC = 'es_test_topic'

# instances of samples pushed to Kafka
GENERATED_SAMPLES = {}

Exemple #17
0
from aet.exceptions import MessageHandlingException
from aet.job import BaseJob, JobManager, JobStatus
from aet.jsonpath import CachedParser  # noqa
from aet.kafka import KafkaConsumer
from aet.logger import get_logger
from aet.resource import (  # noqa
    BaseResource,
    BASE_PUBLIC_ACTIONS,
    lock,
    MethodDesc
)


from .assets.schemas import test_schemas

LOG = get_logger('Test')

here = os.path.dirname(os.path.realpath(__file__))

kafka_server = 'kafka-test:29092'
kafka_connection_retry = 10
kafka_connection_retry_wait = 6
# increasing topic_size may cause poll to be unable to get all the messages in one call.
# needs to be even an if > 100 a multiple of 100.
topic_size = 500


TestResourceDef1 = {'id': '1', 'username': '******', 'password': '******'}


class BadResource(BaseResource):
# under the License.

import pytest
import requests
import json
from time import sleep

from . import *  # noqa
from . import (  # noqa  # for the linter
    ElasticsearchConsumer, RequestClientT1, RequestClientT2, URL,
    check_local_es_readyness)

from aet.logger import get_logger
from app.fixtures import examples

LOG = get_logger('TEST')
'''
    API Tests
'''


@pytest.mark.unit
def test__consumer_add_delete_respect_tenants(ElasticsearchConsumer,
                                              RequestClientT1,
                                              RequestClientT2):
    res = RequestClientT1.post(f'{URL}/elasticsearch/add',
                               json=examples.ES_INSTANCE)
    assert (res.json() is True)
    res = RequestClientT1.get(f'{URL}/elasticsearch/list')
    assert (res.json() != [])
    res = RequestClientT2.get(f'{URL}/elasticsearch/list')
Exemple #19
0
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

from datetime import datetime, timedelta
import json

from aet.logger import get_logger
from aet.jsonpath import CachedParser
from aether.python.utils import replace_nested
from aether.python.avro.schema import Node

from .config import get_consumer_config, AVRO_TYPES

LOG = get_logger('PROCESS')
CONSUMER_CONFIG = get_consumer_config()

ES_RESERVED = [
    '_uid', '_id', '_type', '_source', '_all', '_field_names', '_routing',
    '_index', '_size', '_timestamp', '_ttl', '_version', '_submitted_at',
    '_surveyor'
]

AVRO_BASE_COERCE = {
    # avro_type -> handler
}

AVRO_LOGICAL_COERCE = {
    # logical_avro_type -> handler
    # int(days since epoch) -> iso_string