Beispiel #1
0
def discover(tap_name, disable_colors: bool = False):
    """Run discover for a singer tap"""

    patch(
        mara_pipelines.config.root_pipeline)(lambda: _internal_root_pipeline())

    # the pipeline to run
    pipeline, found = pipelines.find_node(
        ['_singer', tap_name.replace('-', '_')])
    if not found:
        print(
            f'Could not find pipeline. You have to add {tap_name} to config mara_singer.config.tap_names to be able to use this command',
            file=sys.stderr)
        sys.exit(-1)
    if not isinstance(pipeline, pipelines.Pipeline):
        print(
            f'Internal error: Note is not a pipeline, but a {pipeline.__class__.__name__}',
            file=sys.stderr)
        sys.exit(-1)

    # a list of nodes to run selectively in the pipeline
    nodes = set()
    nodes.add(pipeline.nodes.get('discover'))

    if not mara_pipelines.ui.cli.run_pipeline(pipeline,
                                              nodes,
                                              interactively_started=False,
                                              disable_colors=disable_colors):
        sys.exit(-1)
Beispiel #2
0
def client_unauth():
    import mara_page.acl
    import mara_markdown_docs.docs
    orig_func = mara_page.acl.current_user_has_permissions

    @patch(mara_page.acl.current_user_has_permissions)
    def current_user_has_permissions(resources):
        def _exclude_docs(resource):
            if resource is mara_markdown_docs.docs.documentation_acl_resource:
                return [resource, False]
            return [resource, True]

        return list(map(_exclude_docs, resources))

    yield from _client()

    patch(mara_page.acl.current_user_has_permissions)(orig_func)
Beispiel #3
0
import os
from data_integration.pipelines import Pipeline, Task
from data_integration.ui.cli import run_pipeline
import mara_db.auto_migration
import mara_db.config
import mara_db.dbs
import data_integration
import data_integration.config
from mara_app.monkey_patch import patch
from bcreg.bcreg_pipelines import bc_reg_root_pipeline

patch(data_integration.config.system_statistics_collection_period)(lambda: 15)


@patch(data_integration.config.root_pipeline)
def root_pipeline():
    return bc_reg_root_pipeline()


mara_host = os.environ.get('MARA_DB_HOST', 'bcregdb')
mara_database = os.environ.get('MARA_DB_DATABASE', 'mara_db')
mara_port = os.environ.get('MARA_DB_PORT', '5432')
mara_user = os.environ.get('MARA_DB_USER', 'mara_db')
mara_password = os.environ.get('MARA_DB_PASSWORD')

mara_db.config.databases \
    = lambda: {'mara': mara_db.dbs.PostgreSQLDB(user=mara_user, password=mara_password, host=mara_host, database=mara_database, port=mara_port)}

(child_pipeline,
 success) = data_integration.pipelines.find_node(['bc_reg_event_processor'])
if success:
Beispiel #4
0
    mara_host = os.environ.get('MARA_DB_HOST', 'bcregdb')
    mara_database = os.environ.get('MARA_DB_DATABASE', 'mara_db')
    mara_port = os.environ.get('MARA_DB_PORT', '5432')
    mara_user = os.environ.get('MARA_DB_USER', 'mara_db')
    mara_password = os.environ.get('MARA_DB_PASSWORD')

    return {
        'mara':
        mara_db.dbs.PostgreSQLDB(user=mara_user,
                                 password=mara_password,
                                 host=mara_host,
                                 database=mara_database,
                                 port=mara_port)
    }


# How many cores to use for running the ETL, defaults to the number of CPUs of the machine
# On production, make sure the ETL does not slow down other services too much
patch(data_integration.config.max_number_of_parallel_tasks)(lambda: 4)

# The first day for which to download and process data (default 2017-01-01).
# Locally, a few days of data is enough to test a pipeline.
# On production, size of days that can be processed depends on machine size.
# One year of data amounts to roughly 50GB database size
patch(app.config.first_date)(
    lambda: datetime.date.today() - datetime.timedelta(days=5))

# Whether it is possible to run the ETL from the web UI
# Disable on production
patch(data_integration.config.allow_run_from_web_ui)(lambda: True)
"""Configures the data integration pipelines of the project"""

import datetime
import functools

import data_integration.config
from data_integration.pipelines import Pipeline, Task
from mara_app.monkey_patch import patch

import app.config

from bcreg.bcreg_pipelines import bc_reg_root_pipeline

patch(data_integration.config.data_dir)(lambda: app.config.data_dir())
patch(data_integration.config.first_date)(lambda: app.config.first_date())
patch(data_integration.config.default_db_alias)(lambda: 'dwh')
patch(data_integration.config.system_statistics_collection_period)(lambda: 15)


@patch(data_integration.config.root_pipeline)
@functools.lru_cache(maxsize=None)
def root_pipeline():
    return bc_reg_root_pipeline()
    for data_set in mt_data_sets():
        personal_data_column_names = []
        default_column_names = []
        for path, attributes in data_set.connected_attributes().items():
            for prefixed_name, attribute in attributes.items():
                if attribute.personal_data:
                    personal_data_column_names.append(prefixed_name)
                if attribute.important_field:
                    default_column_names.append(prefixed_name)

        for metric in data_set.metrics.values():
            if metric.important_field:
                default_column_names.append(metric.name)

        result.append(
            mara_data_explorer.data_set.DataSet(
                id=data_set.id(),
                name=data_set.name,
                database_alias='dwh',
                database_schema='data_sets',
                database_table=data_set.id(),
                personal_data_column_names=personal_data_column_names,
                default_column_names=default_column_names,
                use_attributes_table=True))
    return result


# adapt to the favorite chart color of your company
patch(mara_data_explorer.config.charts_color)(lambda: '#0275d8')
Beispiel #7
0
def test_state_read_not_existing_file():
    patch(config.state_dir)(lambda: './tests/')
    state = SingerTapState(tap_name='does_not_exist-state')
    bk_value = state.get_bookmark(tap_stream_id='STREAM_NAME', key='date')
    assert bk_value == None
Beispiel #8
0
def test_state_read_sample_state_file():
    patch(config.state_dir)(lambda: './tests/')
    state = SingerTapState(tap_name='sample-state1')
    bk_value = state.get_bookmark(tap_stream_id='STREAM_NAME', key='date')
    assert bk_value == '2020-01-01T00:00:00.000000Z'
                '# Closed pull requests'
            ],
            use_attributes_table=True),
        mara_data_explorer.data_set.DataSet(
            id='github-repo-activity',
            name='Github repo activities',
            database_alias='dwh',
            database_schema='gh_dim',
            database_table='repo_activity_data_set',
            default_column_names=[
                'Date', 'User', 'Repo', '# Forks', '# Commits',
                '# Closed pull requests'
            ],
            use_attributes_table=True),
        mara_data_explorer.data_set.DataSet(
            id='pypi-download-counts',
            name='PyPI download counts',
            database_alias='dwh',
            database_schema='pypi_dim',
            database_table='download_counts_data_set',
            default_column_names=[
                'Download date', 'Project', 'Project version', 'Installer',
                'Python version', '# Downloads'
            ],
            use_attributes_table=True),
    ]


# adapt to the favorite chart color of your company
patch(mara_data_explorer.config.charts_color)(lambda: '#008000')
Beispiel #10
0
import mara_page.acl
from mara_app import monkey_patch
from mara_page import acl
from mara_page import navigation

from app.ui import start_page

blueprint = flask.Blueprint('ui',
                            __name__,
                            url_prefix='/ui',
                            static_folder='static')

MARA_FLASK_BLUEPRINTS = [start_page.blueprint, blueprint]

# replace logo and favicon
monkey_patch.patch(mara_app.config.favicon_url)(
    lambda: flask.url_for('ui.static', filename='favicon.ico'))
monkey_patch.patch(mara_app.config.logo_url)(
    lambda: flask.url_for('ui.static', filename='logo.png'))


# add custom css
@monkey_patch.wrap(mara_app.layout.css_files)
def css_files(original_function, response):
    files = original_function(response)
    files.append(flask.url_for('ui.static', filename='styles.css'))
    return files


# define protected ACL resources
@monkey_patch.patch(mara_acl.config.resources)
def acl_resources():
Beispiel #11
0
"""Configures the data integration pipelines of the project"""

import datetime
import functools

import data_integration.config
from data_integration.pipelines import Pipeline, Task
from mara_app.monkey_patch import patch

import app.config

from bcreg.bcreg_pipelines import db_init_pipeline, bc_reg_pipeline, bc_reg_pipeline_status, bc_reg_pipeline_initial_load, bc_reg_pipeline_post_credentials
from bcreg.bcreg_pipelines import bc_init_test_data, bc_reg_test_corps, bc_reg_pipeline_jsonbender

patch(data_integration.config.data_dir)(lambda: app.config.data_dir())
patch(data_integration.config.first_date)(lambda: app.config.first_date())
patch(data_integration.config.default_db_alias)(lambda: 'dwh')


@patch(data_integration.config.root_pipeline)
@functools.lru_cache(maxsize=None)
def root_pipeline():

    parent_pipeline = Pipeline(
        id='holder_for_pipeline_versions',
        description=
        'Holder for the different versions of the BC Registries pipeline.')

    parent_pipeline.add(bc_reg_pipeline())
    parent_pipeline.add(bc_reg_pipeline_status())
Beispiel #12
0
"""Configures the data integration pipelines of the project"""

import datetime
import functools

import mara_pipelines.config
import etl_tools.config
from mara_pipelines.pipelines import Pipeline
from mara_app.monkey_patch import patch

import app.config

patch(mara_pipelines.config.data_dir)(lambda: app.config.data_dir())
patch(mara_pipelines.config.first_date)(lambda: app.config.first_date())
patch(mara_pipelines.config.default_db_alias)(lambda: 'dwh')


@patch(mara_pipelines.config.root_pipeline)
@functools.lru_cache(maxsize=None)
def root_pipeline():
    import app.pipelines.initialize_db
    import app.pipelines.load_data.load_ecommerce_data
    import app.pipelines.load_data.load_marketing_data
    import app.pipelines.e_commerce
    import app.pipelines.marketing
    import app.pipelines.generate_artifacts
    import app.pipelines.update_frontends
    import app.pipelines.consistency_checks
    import app.pipelines.update_frontends

    pipeline = Pipeline(

@patch(data_sets.config.data_sets)
def _data_sets():
    return [
        data_sets.data_set.DataSet(
            id='python-project-activity', name='Python project activities',
            database_alias='dwh', database_schema='pp_dim', database_table='python_project_activity_data_set',
            default_column_names=['Date', 'Project',
                                  '# Downloads', '# Forks', '# Commits', '# Closed pull requests'],
            use_attributes_table=True),

        data_sets.data_set.DataSet(
            id='github-repo-activity', name='Github repo activities',
            database_alias='dwh', database_schema='gh_dim', database_table='repo_activity_data_set',
            default_column_names=['Date', 'User', 'Repo',
                                  '# Forks', '# Commits', '# Closed pull requests'],
            use_attributes_table=True),

        data_sets.data_set.DataSet(
            id='pypi-download-counts', name='PyPI download counts',
            database_alias='dwh', database_schema='pypi_dim', database_table='download_counts_data_set',
            default_column_names=['Download date', 'Project', 'Project version',
                                  'Installer', 'Python version', '# Downloads'],
            use_attributes_table=True),

    ]

# adapt to the favorite chart color of your company
patch(data_sets.config.charts_color)(lambda: '#008000')