def transform_for_bc(pipeline_: Pipeline) -> dict: data = { 'pipeline_id': pipeline_.name, 'created': int(pipeline_.created_at.timestamp()), 'updated': int(pipeline_.last_edited.timestamp()), 'status': pipeline_.status, 'schemaId': pipeline_.get_schema_id(), 'source': { 'name': pipeline_.source.name, 'type': pipeline_.source.type, }, 'scheduling': { 'interval': pipeline_.interval, 'delay': pipeline_.delay, }, 'progress': { 'last_offset': pipeline_.offset.offset if pipeline_.offset else '', }, # we need to always send schema even if the pipeline doesn't use it 'schema': pipeline_.get_schema() if pipeline_.get_schema_id() else schema.build(pipeline_), 'config': pipeline_.config, } data['config'].pop('interval', 0) data['config'].pop('delay', 0) return data
def _create_metric(pipeline_: Pipeline, var_binds: list) -> dict: metric = { 'measurements': {}, 'schemaId': pipeline_.get_schema_id(), 'dimensions': {}, 'tags': {}, } for var_bind in var_binds: logger_.debug(f'Processing OID: {str(var_bind[0])}') if _is_value(str(var_bind[0]), pipeline_): measurement_name = _get_measurement_name(var_bind[0], pipeline_) measurement_value = _get_value(var_bind, pipeline_) metric['measurements'][measurement_name] = measurement_value logger_.debug( f'Measurement `{measurement_name}` with a value: {measurement_value}' ) elif _is_dimension(str(var_bind[0]), pipeline_): dimension_name = _get_dimension_name(var_bind[0], pipeline_) metric['dimensions'][dimension_name] = str(var_bind[1]) logger_.debug( f'Dimension `{dimension_name}` with a value: {str(var_bind[1])}' ) if not metric['measurements'] or not metric['dimensions']: logger_.warning('No metrics extracted') return {} metric['timestamp'] = int(time.time()) return metric
def build(pipeline: Pipeline) -> dict: schema_ = { 'version': '1', 'name': pipeline.name, 'dimensions': pipeline.dimension_names, 'measurements': _get_measurements(pipeline), 'missingDimPolicy': { 'action': 'fill', 'fill': 'NULL' }, } if pipeline.dvp_config: schema_['dvpConfig'] = pipeline.dvp_config if pipeline.get_schema_id(): schema_['id'] = pipeline.get_schema_id() return schema_
def _create_metrics(data: dict, pipeline_: Pipeline) -> list: metrics = [] # these values must be outside the for loop for optimization purposes fields_dims = field.build_fields(pipeline_.dimension_configurations) fields_meas = field.build_fields(pipeline_.measurement_configurations) fields_tags = field.build_fields(pipeline_.tag_configurations) schema_id = pipeline_.get_schema_id() try: for obj in data: metric = { "timestamp": obj[pipeline_.timestamp_name], "dimensions": field.extract_fields(fields_dims, obj), "measurements": field.extract_fields(fields_meas, obj, True), "tags": { name: [tags] for name, tags in field.extract_fields(fields_tags, obj).items() }, "schemaId": schema_id, } metrics.append(metric) except NoMeasurementException as e: message = f'[{pipeline_.name}] - These values were not extracted from data: {e}' if pipeline_.is_strict: raise Exception(message) from e else: logger_.warning(message) return metrics
def _delete_schema(pipeline_: Pipeline): if pipeline_.has_schema(): schema.delete(pipeline_.get_schema_id()) pipeline_.schema = {}