def transform_for_bc(pipeline_: Pipeline) -> dict: data = { 'pipeline_id': pipeline_.name, 'created': int(pipeline_.created_at.timestamp()), 'updated': int(pipeline_.last_edited.timestamp()), 'status': pipeline_.status, 'schemaId': pipeline_.get_schema_id(), 'source': { 'name': pipeline_.source.name, 'type': pipeline_.source.type, }, 'scheduling': { 'interval': pipeline_.interval, 'delay': pipeline_.delay, }, 'progress': { 'last_offset': pipeline_.offset.offset if pipeline_.offset else '', }, # we need to always send schema even if the pipeline doesn't use it 'schema': pipeline_.get_schema() if pipeline_.get_schema_id() else schema.build(pipeline_), 'config': pipeline_.config, } data['config'].pop('interval', 0) data['config'].pop('delay', 0) return data
def update(pipeline_: Pipeline, config_: dict = None): with pipeline.repository.SessionManager(pipeline_): if config_: _load_config(pipeline_, config_, is_edit=True) if not pipeline_.config_changed(): logger_.info(f'No need to update pipeline {pipeline_.name}') return extra_setup.do(pipeline_) if pipeline_.uses_schema(): _update_schema(pipeline_) sdc_client.update(pipeline_) reset_pipeline_retries(pipeline_) logger_.info(f'Updated pipeline {pipeline_.name}')
def build(pipeline: Pipeline) -> dict: schema_ = { 'version': '1', 'name': pipeline.name, 'dimensions': pipeline.dimension_names, 'measurements': _get_measurements(pipeline), 'missingDimPolicy': { 'action': 'fill', 'fill': 'NULL' }, } if pipeline.dvp_config: schema_['dvpConfig'] = pipeline.dvp_config if pipeline.get_schema_id(): schema_['id'] = pipeline.get_schema_id() return schema_
def _create_metric(pipeline_: Pipeline, var_binds: list) -> dict: metric = { 'measurements': {}, 'schemaId': pipeline_.get_schema_id(), 'dimensions': {}, 'tags': {}, } for var_bind in var_binds: logger_.debug(f'Processing OID: {str(var_bind[0])}') if _is_value(str(var_bind[0]), pipeline_): measurement_name = _get_measurement_name(var_bind[0], pipeline_) measurement_value = _get_value(var_bind, pipeline_) metric['measurements'][measurement_name] = measurement_value logger_.debug( f'Measurement `{measurement_name}` with a value: {measurement_value}' ) elif _is_dimension(str(var_bind[0]), pipeline_): dimension_name = _get_dimension_name(var_bind[0], pipeline_) metric['dimensions'][dimension_name] = str(var_bind[1]) logger_.debug( f'Dimension `{dimension_name}` with a value: {str(var_bind[1])}' ) if not metric['measurements'] or not metric['dimensions']: logger_.warning('No metrics extracted') return {} metric['timestamp'] = int(time.time()) return metric
def _create_metrics(data: dict, pipeline_: Pipeline) -> list: metrics = [] # these values must be outside the for loop for optimization purposes fields_dims = field.build_fields(pipeline_.dimension_configurations) fields_meas = field.build_fields(pipeline_.measurement_configurations) fields_tags = field.build_fields(pipeline_.tag_configurations) schema_id = pipeline_.get_schema_id() try: for obj in data: metric = { "timestamp": obj[pipeline_.timestamp_name], "dimensions": field.extract_fields(fields_dims, obj), "measurements": field.extract_fields(fields_meas, obj, True), "tags": { name: [tags] for name, tags in field.extract_fields(fields_tags, obj).items() }, "schemaId": schema_id, } metrics.append(metric) except NoMeasurementException as e: message = f'[{pipeline_.name}] - These values were not extracted from data: {e}' if pipeline_.is_strict: raise Exception(message) from e else: logger_.warning(message) return metrics
def update_pipeline_watermark(pipeline_: Pipeline, timestamp: float): if pipeline_.watermark: pipeline_.watermark.timestamp = timestamp else: pipeline_.watermark = pipeline.PipelineWatermark( pipeline_.name, timestamp) pipeline.repository.save(pipeline_.watermark)
def reset(pipeline_: Pipeline): try: sdc_client.reset(pipeline_) if pipeline_.offset: pipeline.repository.delete_offset(pipeline_.offset) pipeline_.offset = None except sdc_client.ApiClientException as e: raise pipeline.PipelineException(str(e)) from e
def create(pipeline_: Pipeline, config_: dict = None): with pipeline.repository.SessionManager(pipeline_): if config_: _load_config(pipeline_, config_) extra_setup.do(pipeline_) if pipeline_.uses_schema(): _update_schema(pipeline_) notifications.repository.create_notifications(pipeline_) sdc_client.create(pipeline_)
def update_pipeline_offset(pipeline_: Pipeline, timestamp: float): offset = sdc_client.get_pipeline_offset(pipeline_) if not offset: return if pipeline_.offset: pipeline_.offset.offset = offset pipeline_.offset.timestamp = timestamp else: pipeline_.offset = pipeline.PipelineOffset(pipeline_.id, offset, timestamp) pipeline.repository.save(pipeline_.offset)
def _get_config_loader(pipeline_: Pipeline): if isinstance(pipeline_, pipeline.TestPipeline): return pipeline.config.loader.TestPipelineConfigLoader if isinstance(pipeline_, pipeline.RawPipeline): return pipeline.config.loader.RawConfigLoader if isinstance(pipeline_, pipeline.EventsPipeline): return pipeline.config.loader.EventsConfigLoader if isinstance(pipeline_, pipeline.TopologyPipeline): return pipeline.config.loader.TopologyConfigLoader if pipeline_.uses_schema(): return pipeline.config.loader.SchemaConfigLoader return pipeline.config.loader.NoSchemaConfigLoader
def get_config_handler(pipeline_: Pipeline) -> ConfigHandler: base_config = _get_config_loader(pipeline_).load_base_config(pipeline_) if isinstance(pipeline_, pipeline.TopologyPipeline): return _get_topology_handler(pipeline_, base_config) if isinstance(pipeline_, pipeline.RawPipeline): return _get_raw_handler(pipeline_, base_config) if isinstance(pipeline_, pipeline.TestPipeline): return _get_test_handler(pipeline_, base_config) if isinstance(pipeline_, pipeline.EventsPipeline): return _get_events_handler(pipeline_, base_config) if pipeline_.uses_schema(): return _get_schema_handler(pipeline_, base_config) return _get_no_schema_handler(pipeline_, base_config)
def _get_tags_expressions(pipeline_: Pipeline) -> list: tags_expressions = [get_value('/tags', 'record:value("/tags") == NULL ? emptyMap() : record:value("/tags")')] for tag_name, tag_values in pipeline_.get_tags().items(): tags_expressions.append(get_value(f'/tags/{tag_name}', 'emptyList()')) tags_expressions.extend(get_value(f'/tags/{tag_name}[{idx}]', f'"{val}"') for idx, val in enumerate(tag_values)) return tags_expressions
def _construct(pipeline_: Pipeline) -> Pipeline: if not pipeline_.destination: # this is needed for raw pipelines pipeline_.destination = HttpDestination() return _construct_pipeline(_construct_source(pipeline_))
def increase_retry_counter(pipeline_: Pipeline): if not pipeline_.retries: pipeline_.retries = PipelineRetries(pipeline_) pipeline_.retries.number_of_error_statuses += 1 pipeline.repository.save(pipeline_.retries)
def create_pipeline(pipeline_id: str, source_name: str) -> Pipeline: return Pipeline( pipeline_id, source.repository.get_by_name(source_name), destination.repository.get(), )
def should_send_error_notification(pipeline_: Pipeline) -> bool: return not constants.DISABLE_PIPELINE_ERROR_NOTIFICATIONS \ and pipeline_.error_notification_enabled()
def _check_pipeline(self, pipeline_: Pipeline): assert pipeline_.uses_schema()
def _update_schema(pipeline_: Pipeline): new_schema = schema.build(pipeline_) if old_schema := pipeline_.get_schema(): if not schema.equal(old_schema, new_schema): pipeline_.schema = schema.update(new_schema) return
def _delete_schema(pipeline_: Pipeline): if pipeline_.has_schema(): schema.delete(pipeline_.get_schema_id()) pipeline_.schema = {}
def _construct_pipeline(pipeline_: Pipeline) -> Pipeline: pipeline_.__class__ = pipeline.TYPES[pipeline_.type] return pipeline_