예제 #1
0
def get_or_get_and_create_output_dataset(transform, input_dataset):
    output_group = transform['output'].get('data-group',
                                           input_dataset['data_group'])
    output_type = transform['output']['data-type']

    admin_api = AdminAPI(
        config.STAGECRAFT_URL,
        config.STAGECRAFT_OAUTH_TOKEN,
    )
    output_data_set_config = admin_api.get_data_set(output_group, output_type)
    if not output_data_set_config:
        data_set_config = {
            'data_type': output_type,
            'data_group': output_group,
            'bearer_token': input_dataset['bearer_token'],
            'realtime': input_dataset['realtime'],
            'published': input_dataset['published'],
            'max_age_expected': input_dataset['max_age_expected'],
        }

        if 'capped_size' in input_dataset and input_dataset['capped_size']:
            data_set_config['capped_size'] = input_dataset['capped_size']

        output_data_set_config = admin_api.create_data_set(data_set_config)

    return DataSet.from_group_and_type(
        config.BACKDROP_WRITE_URL,
        output_group,
        output_type,
        token=output_data_set_config['bearer_token'],
    )
예제 #2
0
def get_or_get_and_create_output_dataset(transform, input_dataset):
    output_group = transform['output'].get(
        'data-group', input_dataset['data_group'])
    output_type = transform['output']['data-type']

    admin_api = AdminAPI(
        config.STAGECRAFT_URL,
        config.STAGECRAFT_OAUTH_TOKEN,
    )
    output_data_set_config = admin_api.get_data_set(output_group, output_type)
    if not output_data_set_config:
        data_set_config = {
            'data_type': output_type,
            'data_group': output_group,
            'bearer_token': input_dataset['bearer_token'],
            'realtime': input_dataset['realtime'],
            'published': input_dataset['published'],
            'max_age_expected': input_dataset['max_age_expected'],
        }

        if 'capped_size' in input_dataset and input_dataset['capped_size']:
            data_set_config['capped_size'] = input_dataset['capped_size']

        output_data_set_config = admin_api.create_data_set(data_set_config)

    return DataSet.from_group_and_type(
        config.BACKDROP_WRITE_URL,
        output_group,
        output_type,
        token=output_data_set_config['bearer_token'],
    )
예제 #3
0
def get_module_choices():
    choices = [('', '')]

    if not getenv('TESTING', False):
        try:
            # Create an unauthenticated client
            admin_client = AdminAPI(app.config['STAGECRAFT_HOST'], None)
            module_types = admin_client.list_module_types()
            choices += [
                (module['id'], module['name']) for module in module_types]
        except requests.ConnectionError:
            if not app.config['DEBUG']:
                raise
    return choices
예제 #4
0
def entrypoint(dataset_id, earliest, latest):
    """
    For the given parameters, query stagecraft for transformations
    to run, and dispatch tasks to the appropriate workers.
    """

    admin_api = AdminAPI(
        config.STAGECRAFT_URL,
        config.STAGECRAFT_OAUTH_TOKEN,
    )

    transforms = admin_api.get_data_set_transforms(dataset_id)
    data_set_config = admin_api.get_data_set_by_name(dataset_id)

    for transform in transforms:
        app.send_task('backdrop.transformers.dispatch.run_transform',
                      args=(data_set_config, transform, earliest, latest))
예제 #5
0
def entrypoint(dataset_id, earliest, latest):
    """
    For the given parameters, query stagecraft for transformations
    to run, and dispatch tasks to the appropriate workers.
    """

    admin_api = AdminAPI(
        config.STAGECRAFT_URL,
        config.STAGECRAFT_OAUTH_TOKEN,
    )

    transforms = admin_api.get_data_set_transforms(dataset_id)
    data_set_config = admin_api.get_data_set_by_name(dataset_id)

    for transform in transforms:
        app.send_task(
            'backdrop.transformers.dispatch.run_transform',
            args=(data_set_config, transform, earliest, latest)
        )
        stats_client.incr('dispatch')
예제 #6
0
def compute(new_data, transform, data_set_config):

    # Sort the new data by timestamp and use the latest data point.
    new_data.sort(key=lambda item: item['_timestamp'], reverse=True)
    latest_datum = new_data[0]

    # Only continue if we are not back filling data.
    if not is_latest_data(data_set_config, transform, latest_datum):
        pass

    # Input data won't have a unique key for each type of value.
    # E.g. completion rate and digital takeup are both "rate".
    # Use the data_type as the value key in the output, and map
    # the data_type to the expected key to get the value.
    value_key = data_type_to_value_mappings[data_set_config['data_type']]

    # A dataset may be present on multiple dashboards. Produce a
    # latest value for each published dashboard, keyed by slug.
    admin_api = AdminAPI(config.STAGECRAFT_URL, config.STAGECRAFT_OAUTH_TOKEN)
    latest_values = []
    configs = admin_api.get_data_set_dashboard(data_set_config['name'])

    # New dataset name convention uses underscores.
    data_type = string.replace(data_set_config['data_type'], '-', '_')

    for dashboard_config in configs:
        if(dashboard_config['published']
           and latest_datum[value_key] is not None):
            slug = dashboard_config['slug']
            id = encode_id(slug, data_type)
            latest_values.append({
                '_id': id,
                'dashboard_slug': slug,
                data_type: latest_datum[value_key],
                '_timestamp': latest_datum['_timestamp'],
                'service_id': slug
            })

    return latest_values
def compute(new_data, transform, data_set_config):

    # Sort the new data by timestamp and use the latest data point.
    new_data.sort(key=lambda item: item['_timestamp'], reverse=True)
    latest_datum = new_data[0]

    # Only continue if we are not back filling data.
    if not is_latest_data(data_set_config, transform, latest_datum):
        pass

    # Input data won't have a unique key for each type of value.
    # E.g. completion rate and digital takeup are both "rate".
    # Use the data_type as the value key in the output, and map
    # the data_type to the expected key to get the value.
    value_key = data_type_to_value_mappings[data_set_config['data_type']]

    # A dataset may be present on multiple dashboards. Produce a
    # latest value for each published dashboard, keyed by slug.
    admin_api = AdminAPI(config.STAGECRAFT_URL, config.STAGECRAFT_OAUTH_TOKEN)
    latest_values = []
    configs = admin_api.get_data_set_dashboard(data_set_config['name'])

    # New dataset name convention uses underscores.
    data_type = string.replace(data_set_config['data_type'], '-', '_')

    for dashboard_config in configs:
        if (dashboard_config['published']
                and latest_datum[value_key] is not None):
            slug = dashboard_config['slug']
            id = encode_id(slug, data_type)
            latest_values.append({
                '_id': id,
                'dashboard_slug': slug,
                data_type: latest_datum[value_key],
                '_timestamp': latest_datum['_timestamp'],
                'service_id': slug
            })

    return latest_values
예제 #8
0
def get_or_get_and_create_output_dataset(transform, input_dataset):
    output_group = transform['output'].get('data-group',
                                           input_dataset['data_group'])
    output_type = transform['output']['data-type']

    admin_api = AdminAPI(
        config.STAGECRAFT_URL,
        config.STAGECRAFT_OAUTH_TOKEN,
    )
    output_data_set_config = admin_api.get_data_set(output_group, output_type)
    if not output_data_set_config:
        data_set_config = dict(input_dataset.items() + {
            'data_type': output_type,
            'data_group': output_group,
        }.items())
        del (data_set_config['name'])
        output_data_set_config = admin_api.create_data_set(data_set_config)

    return DataSet.from_group_and_type(
        config.BACKDROP_WRITE_URL,
        output_group,
        output_type,
        token=output_data_set_config['bearer_token'],
    )
예제 #9
0
        'name': "number_of_digital_transactions",
        'ignore': 'quarterly'
    },
    {
        'name': "number_of_transactions",
        'ignore': 'quarterly'
    },
    {
        'name': "total_cost",
        'ignore': 'quarterly'
    },
]

ADDITIONAL_FIELDS = ["end_at", "period", "service_id", "type"]

admin_api = AdminAPI(config.STAGECRAFT_URL, config.STAGECRAFT_OAUTH_TOKEN)


def _get_latest_data_point(sorted_data, data_point_name):
    def _use_data_point(data_point, name, ignore):
        should_not_be_ignored = (ignore != data_point['type'])
        return should_not_be_ignored

    name = data_point_name['name']
    ignore = data_point_name['ignore']

    # sorted_data should be pre sorted so
    # the first returned is always the most recent
    for data_point in sorted_data:
        if _use_data_point(data_point, name, ignore):
            return data_point