Beispiel #1
0
def transform_for_bc(pipeline_: Pipeline) -> dict:
    data = {
        'pipeline_id':
        pipeline_.name,
        'created':
        int(pipeline_.created_at.timestamp()),
        'updated':
        int(pipeline_.last_edited.timestamp()),
        'status':
        pipeline_.status,
        'schemaId':
        pipeline_.get_schema_id(),
        'source': {
            'name': pipeline_.source.name,
            'type': pipeline_.source.type,
        },
        'scheduling': {
            'interval': pipeline_.interval,
            'delay': pipeline_.delay,
        },
        'progress': {
            'last_offset': pipeline_.offset.offset if pipeline_.offset else '',
        },
        # we need to always send schema even if the pipeline doesn't use it
        'schema':
        pipeline_.get_schema()
        if pipeline_.get_schema_id() else schema.build(pipeline_),
        'config':
        pipeline_.config,
    }
    data['config'].pop('interval', 0)
    data['config'].pop('delay', 0)
    return data
Beispiel #2
0
def update(pipeline_: Pipeline, config_: dict = None):
    with pipeline.repository.SessionManager(pipeline_):
        if config_:
            _load_config(pipeline_, config_, is_edit=True)
        if not pipeline_.config_changed():
            logger_.info(f'No need to update pipeline {pipeline_.name}')
            return
        extra_setup.do(pipeline_)
        if pipeline_.uses_schema():
            _update_schema(pipeline_)
        sdc_client.update(pipeline_)
        reset_pipeline_retries(pipeline_)
        logger_.info(f'Updated pipeline {pipeline_.name}')
Beispiel #3
0
def build(pipeline: Pipeline) -> dict:
    schema_ = {
        'version': '1',
        'name': pipeline.name,
        'dimensions': pipeline.dimension_names,
        'measurements': _get_measurements(pipeline),
        'missingDimPolicy': {
            'action': 'fill',
            'fill': 'NULL'
        },
    }
    if pipeline.dvp_config:
        schema_['dvpConfig'] = pipeline.dvp_config
    if pipeline.get_schema_id():
        schema_['id'] = pipeline.get_schema_id()
    return schema_
Beispiel #4
0
def _create_metric(pipeline_: Pipeline, var_binds: list) -> dict:
    metric = {
        'measurements': {},
        'schemaId': pipeline_.get_schema_id(),
        'dimensions': {},
        'tags': {},
    }

    for var_bind in var_binds:
        logger_.debug(f'Processing OID: {str(var_bind[0])}')
        if _is_value(str(var_bind[0]), pipeline_):
            measurement_name = _get_measurement_name(var_bind[0], pipeline_)
            measurement_value = _get_value(var_bind, pipeline_)
            metric['measurements'][measurement_name] = measurement_value
            logger_.debug(
                f'Measurement `{measurement_name}` with a value: {measurement_value}'
            )
        elif _is_dimension(str(var_bind[0]), pipeline_):
            dimension_name = _get_dimension_name(var_bind[0], pipeline_)
            metric['dimensions'][dimension_name] = str(var_bind[1])
            logger_.debug(
                f'Dimension `{dimension_name}` with a value: {str(var_bind[1])}'
            )
    if not metric['measurements'] or not metric['dimensions']:
        logger_.warning('No metrics extracted')
        return {}
    metric['timestamp'] = int(time.time())
    return metric
Beispiel #5
0
def _create_metrics(data: dict, pipeline_: Pipeline) -> list:
    metrics = []
    # these values must be outside the for loop for optimization purposes
    fields_dims = field.build_fields(pipeline_.dimension_configurations)
    fields_meas = field.build_fields(pipeline_.measurement_configurations)
    fields_tags = field.build_fields(pipeline_.tag_configurations)
    schema_id = pipeline_.get_schema_id()
    try:
        for obj in data:
            metric = {
                "timestamp": obj[pipeline_.timestamp_name],
                "dimensions": field.extract_fields(fields_dims, obj),
                "measurements": field.extract_fields(fields_meas, obj, True),
                "tags": {
                    name: [tags]
                    for name, tags in field.extract_fields(fields_tags,
                                                           obj).items()
                },
                "schemaId": schema_id,
            }
            metrics.append(metric)
    except NoMeasurementException as e:
        message = f'[{pipeline_.name}] - These values were not extracted from data: {e}'
        if pipeline_.is_strict:
            raise Exception(message) from e
        else:
            logger_.warning(message)
    return metrics
Beispiel #6
0
def update_pipeline_watermark(pipeline_: Pipeline, timestamp: float):
    if pipeline_.watermark:
        pipeline_.watermark.timestamp = timestamp
    else:
        pipeline_.watermark = pipeline.PipelineWatermark(
            pipeline_.name, timestamp)
    pipeline.repository.save(pipeline_.watermark)
Beispiel #7
0
def reset(pipeline_: Pipeline):
    try:
        sdc_client.reset(pipeline_)
        if pipeline_.offset:
            pipeline.repository.delete_offset(pipeline_.offset)
            pipeline_.offset = None
    except sdc_client.ApiClientException as e:
        raise pipeline.PipelineException(str(e)) from e
Beispiel #8
0
def create(pipeline_: Pipeline, config_: dict = None):
    with pipeline.repository.SessionManager(pipeline_):
        if config_:
            _load_config(pipeline_, config_)
        extra_setup.do(pipeline_)
        if pipeline_.uses_schema():
            _update_schema(pipeline_)
        notifications.repository.create_notifications(pipeline_)
        sdc_client.create(pipeline_)
Beispiel #9
0
def update_pipeline_offset(pipeline_: Pipeline, timestamp: float):
    offset = sdc_client.get_pipeline_offset(pipeline_)
    if not offset:
        return
    if pipeline_.offset:
        pipeline_.offset.offset = offset
        pipeline_.offset.timestamp = timestamp
    else:
        pipeline_.offset = pipeline.PipelineOffset(pipeline_.id, offset,
                                                   timestamp)
    pipeline.repository.save(pipeline_.offset)
Beispiel #10
0
def _get_config_loader(pipeline_: Pipeline):
    if isinstance(pipeline_, pipeline.TestPipeline):
        return pipeline.config.loader.TestPipelineConfigLoader
    if isinstance(pipeline_, pipeline.RawPipeline):
        return pipeline.config.loader.RawConfigLoader
    if isinstance(pipeline_, pipeline.EventsPipeline):
        return pipeline.config.loader.EventsConfigLoader
    if isinstance(pipeline_, pipeline.TopologyPipeline):
        return pipeline.config.loader.TopologyConfigLoader
    if pipeline_.uses_schema():
        return pipeline.config.loader.SchemaConfigLoader
    return pipeline.config.loader.NoSchemaConfigLoader
Beispiel #11
0
def get_config_handler(pipeline_: Pipeline) -> ConfigHandler:
    base_config = _get_config_loader(pipeline_).load_base_config(pipeline_)
    if isinstance(pipeline_, pipeline.TopologyPipeline):
        return _get_topology_handler(pipeline_, base_config)
    if isinstance(pipeline_, pipeline.RawPipeline):
        return _get_raw_handler(pipeline_, base_config)
    if isinstance(pipeline_, pipeline.TestPipeline):
        return _get_test_handler(pipeline_, base_config)
    if isinstance(pipeline_, pipeline.EventsPipeline):
        return _get_events_handler(pipeline_, base_config)
    if pipeline_.uses_schema():
        return _get_schema_handler(pipeline_, base_config)
    return _get_no_schema_handler(pipeline_, base_config)
Beispiel #12
0
def _get_tags_expressions(pipeline_: Pipeline) -> list:
    tags_expressions = [get_value('/tags', 'record:value("/tags") == NULL ? emptyMap() : record:value("/tags")')]
    for tag_name, tag_values in pipeline_.get_tags().items():
        tags_expressions.append(get_value(f'/tags/{tag_name}', 'emptyList()'))
        tags_expressions.extend(get_value(f'/tags/{tag_name}[{idx}]', f'"{val}"') for idx, val in enumerate(tag_values))
    return tags_expressions
Beispiel #13
0
def _construct(pipeline_: Pipeline) -> Pipeline:
    if not pipeline_.destination:
        # this is needed for raw pipelines
        pipeline_.destination = HttpDestination()
    return _construct_pipeline(_construct_source(pipeline_))
Beispiel #14
0
def increase_retry_counter(pipeline_: Pipeline):
    if not pipeline_.retries:
        pipeline_.retries = PipelineRetries(pipeline_)
    pipeline_.retries.number_of_error_statuses += 1
    pipeline.repository.save(pipeline_.retries)
Beispiel #15
0
def create_pipeline(pipeline_id: str, source_name: str) -> Pipeline:
    return Pipeline(
        pipeline_id,
        source.repository.get_by_name(source_name),
        destination.repository.get(),
    )
Beispiel #16
0
def should_send_error_notification(pipeline_: Pipeline) -> bool:
    return not constants.DISABLE_PIPELINE_ERROR_NOTIFICATIONS \
           and pipeline_.error_notification_enabled()
Beispiel #17
0
 def _check_pipeline(self, pipeline_: Pipeline):
     assert pipeline_.uses_schema()
Beispiel #18
0
def _update_schema(pipeline_: Pipeline):
    new_schema = schema.build(pipeline_)
    if old_schema := pipeline_.get_schema():
        if not schema.equal(old_schema, new_schema):
            pipeline_.schema = schema.update(new_schema)
        return
Beispiel #19
0
def _delete_schema(pipeline_: Pipeline):
    if pipeline_.has_schema():
        schema.delete(pipeline_.get_schema_id())
        pipeline_.schema = {}
Beispiel #20
0
def _construct_pipeline(pipeline_: Pipeline) -> Pipeline:
    pipeline_.__class__ = pipeline.TYPES[pipeline_.type]
    return pipeline_