def test_json_to_cel(): """GIVEN JSON doc; WHEN json_to_cell(); THEN expected conversions applied.""" doc = [ { "bool": True }, { "numbers": [2.71828, 42] }, { "null": None }, { "string": 'embedded "quote"' }, ] actual = celpy.json_to_cel(doc) expected = celtypes.ListType([ celtypes.MapType( {celtypes.StringType("bool"): celtypes.BoolType(True)}), celtypes.MapType({ celtypes.StringType("numbers"): celtypes.ListType( [celtypes.DoubleType(2.71828), celtypes.IntType(42)]) }), celtypes.MapType({celtypes.StringType("null"): None}), celtypes.MapType({ celtypes.StringType("string"): celtypes.StringType('embedded "quote"') }), ]) assert actual == expected
def test_main_pipe(mock_cel_environment, caplog, capsys): """GIVEN JSON AND expression; WHEN eval; THEN correct internal object use.""" argv = ['"Hello world! I\'m " + name + "."'] sys.stdin = io.StringIO('{"name": "CEL"}\n') status = celpy.__main__.main(argv) sys.stdin = sys.__stdin__ assert status == 0 assert mock_cel_environment.mock_calls == [ call(package="jq", annotations=None) ] env = mock_cel_environment.return_value assert env.compile.mock_calls == [ call('"Hello world! I\'m " + name + "."') ] assert env.program.mock_calls == [call(sentinel.AST)] prgm = env.program.return_value assert prgm.evaluate.mock_calls == [ call({ "jq": celtypes.MapType( {celtypes.StringType("name"): celtypes.StringType("CEL")}) }) ] assert caplog.messages == [] out, err = capsys.readouterr() assert out == '"sentinel.OUTPUT"\n' assert err == ""
def test_main_slurp_bool_status(mock_cel_environment_bool, caplog, capsys): """ GIVEN JSON AND slurp option AND formatted output AND int expr WHEN eval THEN correct internal object use. """ argv = ["-s", "-b", '.name == "not CEL"'] sys.stdin = io.StringIO('{"name": "CEL"}\n') status = celpy.__main__.main(argv) sys.stdin = sys.__stdin__ assert status == 1 assert mock_cel_environment_bool.mock_calls == [ call(package='jq', annotations=None) ] env = mock_cel_environment_bool.return_value assert env.compile.mock_calls == [call('.name == "not CEL"')] assert env.program.mock_calls == [call(sentinel.AST)] prgm = env.program.return_value assert prgm.evaluate.mock_calls == [ call({ 'jq': celtypes.MapType( {celtypes.StringType('name'): celtypes.StringType('CEL')}) }) ] assert caplog.messages == [] out, err = capsys.readouterr() assert out == "false\n" assert err == ""
def marked_key(source: celtypes.ListType, target: celtypes.StringType) -> celtypes.Value: """ Examines a list of {"Key": text, "Value": text} mappings looking for the given Key value. Parses a ``message:action@action_date`` value into a mapping {"message": message, "action": action, "action_date": action_date} If no Key or no Value or the Value isn't the right structure, the result is a null. """ value = key(source, target) if value is None: return None try: msg, tgt = cast(celtypes.StringType, value).rsplit(':', 1) action, action_date_str = tgt.strip().split('@', 1) except ValueError: return None return celtypes.MapType({ celtypes.StringType("message"): celtypes.StringType(msg), celtypes.StringType("action"): celtypes.StringType(action), celtypes.StringType("action_date"): celtypes.TimestampType(action_date_str), })
def get_metrics(resource: celtypes.MapType, request: celtypes.MapType) -> celtypes.Value: """ Reach into C7N and make a statistics request using the current C7N filter. This uses the module-global ``C7N`` namespace to access the original filter and policy. This builds a request object that is passed through to AWS via the :func:`get_raw_metrics` function. The ``request`` parameter is a Mapping with the following keys and values: :: Resource.get_metrics({"MetricName": "CPUUtilization", "Statistic": "Average", "StartTime": Now - duration("4d"), "EndTime": Now, "Period": duration("86400s")} ).exists(m, m < 30) The namespace is derived from the ``C7N.policy``. The dimensions are derived from the ``C7N.fiter.model``. .. todo:: Refactor C7N Provide a :py:class:`MetricsAccess` mixin in a :py:class:`CELFilter` class. We want to have the metrics processing in the new :py:class:`CELFilter` instance. """ dimension = celtypes.StringType(C7N.filter.manager.get_model().dimension) namespace = celtypes.StringType(C7N.filter.manager.resource_type) # TODO: Varies by resource/policy type. Each policy's model may have different dimensions. dimensions = json_to_cel([{ 'Name': dimension, 'Value': resource.get(dimension) }]) raw_metrics = cast( celtypes.ListType, get_raw_metrics( celtypes.MapType({ celtypes.StringType("Namespace"): namespace, celtypes.StringType("MetricName"): request["MetricName"], celtypes.StringType("Dimensions"): dimensions, celtypes.StringType("Statistics"): [request["Statistic"]], celtypes.StringType("StartTime"): request["StartTime"], celtypes.StringType("EndTime"): request["EndTime"], celtypes.StringType("Period"): request["Period"], }))) return celtypes.ListType([ cast(celtypes.MapType, item).get(request["Statistic"]) for item in raw_metrics ])
def json_to_cel(document: JSON) -> celtypes.Value: """Convert parsed JSON object from Python to CEL to the extent possible. It's difficult to distinguish strings which should be timestamps or durations. :: >>> from pprint import pprint >>> from celpy.adapter import json_to_cel >>> doc = json.loads('["str", 42, 3.14, null, true, {"hello": "world"}]') >>> cel = json_to_cel(doc) >>> pprint(cel) ListType([StringType('str'), IntType(42), DoubleType(3.14), None, BoolType(True), \ MapType({StringType('hello'): StringType('world')})]) """ if isinstance(document, bool): return celtypes.BoolType(document) elif isinstance(document, float): return celtypes.DoubleType(document) elif isinstance(document, int): return celtypes.IntType(document) elif isinstance(document, str): return celtypes.StringType(document) elif document is None: return None elif isinstance(document, List): return celtypes.ListType([json_to_cel(item) for item in document]) elif isinstance(document, Dict): return celtypes.MapType({ json_to_cel(key): json_to_cel(value) for key, value in document.items() }) else: raise ValueError( f"unexpected type {type(document)} in JSON structure {document!r}")
def image(resource: celtypes.MapType) -> celtypes.Value: """ Reach into C7N to get the image details for this EC2 or ASG resource. Minimally, the creation date is transformed into a CEL timestamp. We may want to slightly generalize this to json_to_cell() the entire Image object. The following may be usable, but it seems too complex: :: C7N.filter.prefetch_instance_images(C7N.policy.resources) image = C7N.filter.get_instance_image(resource["ImageId"]) return json_to_cel(image) .. todo:: Refactor C7N Provide the :py:class:`InstanceImageBase` mixin in a :py:class:`CELFilter` class. We want to have the image details in the new :py:class:`CELFilter` instance. """ # Assuming the :py:class:`CELFilter` class has this method extracted from the legacy filter. # Requies the policy already did this: C7N.filter.prefetch_instance_images([resource]) to # populate cache. image = C7N.filter.get_instance_image(resource) if image: creation_date = image['CreationDate'] else: creation_date = "2000-01-01T01:01:01.000Z" return celtypes.MapType({ celtypes.StringType("CreationDate"): celtypes.TimestampType(creation_date), })
def value_from( url: celtypes.StringType, format: Optional[celtypes.StringType] = None, ) -> celtypes.Value: """ Read values from a URL. First, do :func:`text_from` to read the source. Then, do :func:`parse_text` to parse the source, if needed. This makes the format optional, and deduces it from the URL's path information. C7N will generally replace this with a function that leverages a more sophisticated :class:`c7n.resolver.ValuesFrom`. """ supported_formats = ('json', 'ndjson', 'ldjson', 'jsonl', 'txt', 'csv', 'csv2dict') # 1. get format either from arg or URL if not format: _, suffix = os.path.splitext(url) format = celtypes.StringType(suffix[1:]) if format not in supported_formats: raise ValueError(f"Unsupported format: {format!r}") # 2. read raw data # Note this is directly bound to text_from() and does not go though the environment # or other CEL indirection. raw_data = cast(celtypes.StringType, text_from(url)) # 3. parse physical format (json, ldjson, ndjson, jsonl, txt, csv, csv2dict) return parse_text(raw_data, format)
def arg_type_value(text: str) -> Tuple[str, Annotation, celtypes.Value]: """ Decompose ``-a name:type=value`` argument into a useful triple. Also accept ``-a name:type``. This will find ``name`` in the environment and convert to the requested type. Also accepts ``-a name``. This will find ``name`` in the environment and treat it as a string. Currently, names do not reflect package naming. An environment can be a package, and the activation can include variables that are also part of the package. This is not supported via the CLI. Types can be celtypes class names or TYPE_NAME or PROTOBUF_TYPE :: TYPE_NAME : "int64_value" | "null_value" | "uint64_value" | "double_value" | "bool_value" | "string_value" | "bytes_value" | "number_value" PROTOBUF_TYPE : "single_int64" | "single_int32" | "single_uint64" | "single_uint32" | "single_sint64" | "single_sint32" | "single_fixed64" | "single_fixed32" | "single_sfixed32" | "single_sfixed64" | "single_float" | "single_double" | "single_bool" | "single_string" | "single_bytes" | "single_duration" | "single_timestamp" .. todo:: type names can include `.` to support namespacing for protobuf support. :param text: Argument value :return: Tuple with name, annotation, and resulting object. """ arg_pattern = re.compile( r"^([_a-zA-Z][_a-zA-Z0-9]*)(?::([_a-zA-Z][_a-zA-Z0-9]*))?(?:=(.*))?$") match = arg_pattern.match(text) if match is None: raise argparse.ArgumentTypeError( f"arg {text} not 'var=string', 'var:type=value', or `var:type") name, type_name, value_text = match.groups() if value_text is None: value_text = os.environ.get(name) type_definition: Annotation # CELType or a conversion function value: celtypes.Value # Specific value. if type_name: try: type_definition = CLI_ARG_TYPES[type_name] value = cast( celtypes.Value, type_definition(value_text) # type: ignore[arg-type, call-arg] ) except KeyError: raise argparse.ArgumentTypeError( f"arg {text} type name not in {list(CLI_ARG_TYPES)}") except ValueError: raise argparse.ArgumentTypeError( f"arg {text} value invalid for the supplied type") else: value = celtypes.StringType(value_text) type_definition = celtypes.StringType return name, type_definition, value
def text_from(url: celtypes.StringType, ) -> celtypes.Value: """ Read raw text from a URL. This can be expanded to accept S3 or other URL's. """ req = urllib.request.Request(url, headers={"Accept-Encoding": "gzip"}) raw_data: str with closing(urllib.request.urlopen(req)) as response: if response.info().get('Content-Encoding') == 'gzip': raw_data = (zlib.decompress(response.read(), zlib.MAX_WBITS | 32).decode('utf8')) else: raw_data = response.read().decode('utf-8') return celtypes.StringType(raw_data)
def key(source: celtypes.ListType, target: celtypes.StringType) -> celtypes.Value: """ The C7N shorthand ``tag:Name`` doesn't translate well to CEL. It extracts a single value from a sequence of objects with a ``{"Key": x, "Value": y}`` structure; specifically, the value for ``y`` when ``x == "Name"``. This function locate a particular "Key": target within a list of {"Key": x, "Value", y} items, returning the y value if one is found, null otherwise. In effect, the ``key()`` function:: Resource["Tags"].key("Name") is somewhat like:: Resource["Tags"].filter(x, x["Key"] == "Name")[0]["Value"] But the ``key()`` function doesn't raise an exception if the key is not found, instead it returns None. We might want to generalize this into a ``first()`` reduction macro. ``Resource["Tags"].first(x, x["Key"] == "Name" ? x["Value"] : null, null)`` This macro returns the first non-null value or the default (which can be ``null``.) """ key = celtypes.StringType("Key") value = celtypes.StringType("Value") matches: Iterator[celtypes.Value] = ( item for item in source if cast(celtypes.StringType, cast(celtypes.MapType, item).get(key)) == target # noqa: W503 ) try: return cast(celtypes.MapType, next(matches)).get(value) except StopIteration: return None
def test_encoder(): cel_obj = celtypes.MapType( { celtypes.StringType("bool"): celtypes.BoolType(True), celtypes.StringType("numbers"): celtypes.ListType([ celtypes.DoubleType(2.71828), celtypes.UintType(42) ]), celtypes.StringType("null"): None, celtypes.StringType("string"): celtypes.StringType('embedded "quote"'), celtypes.StringType("bytes"): celtypes.BytesType(bytes([0x62, 0x79, 0x74, 0x65, 0x73])), celtypes.StringType("timestamp"): celtypes.TimestampType('2009-02-13T23:31:30Z'), celtypes.StringType("duration"): celtypes.DurationType('42s'), } ) json_text = json.dumps(cel_obj, cls=celpy.CELJSONEncoder) assert ( json_text == '{"bool": true, "numbers": [2.71828, 42], "null": null, ' '"string": "embedded \\"quote\\"", "bytes": "Ynl0ZXM=", ' '"timestamp": "2009-02-13T23:31:30Z", "duration": "42s"}' )
def parse_text(source_text: celtypes.StringType, format: celtypes.StringType) -> celtypes.Value: """ Parse raw text using a given format. """ if format == "json": return json_to_cel(json.loads(source_text)) elif format == "txt": return celtypes.ListType([ celtypes.StringType(s.rstrip()) for s in source_text.splitlines() ]) elif format in ("ldjson", "ndjson", "jsonl"): return celtypes.ListType( [json_to_cel(json.loads(s)) for s in source_text.splitlines()]) elif format == "csv": return celtypes.ListType( [json_to_cel(row) for row in csv.reader(io.StringIO(source_text))]) elif format == "csv2dict": return celtypes.ListType([ json_to_cel(row) for row in csv.DictReader(io.StringIO(source_text)) ]) else: raise ValueError(f"Unsupported format: {format!r}") # pragma: no cover
def test_decoder(): json_text = ('{"bool": 1, "numbers": [2.71828, 42], "null": null, ' '"string": "embedded \\"quote\\"", "bytes": "Ynl0ZXM=", ' '"timestamp": "2009-02-13T23:31:30Z", "duration": "42s"}') cel_obj = json.loads(json_text, cls=celpy.CELJSONDecoder) assert cel_obj == celtypes.MapType({ celtypes.StringType('bool'): celtypes.IntType(1), celtypes.StringType('bytes'): celtypes.StringType('Ynl0ZXM='), celtypes.StringType('duration'): celtypes.StringType('42s'), celtypes.StringType('null'): None, celtypes.StringType('numbers'): celtypes.ListType([celtypes.DoubleType(2.71828), celtypes.IntType(42)]), celtypes.StringType('string'): celtypes.StringType('embedded "quote"'), celtypes.StringType('timestamp'): celtypes.StringType('2009-02-13T23:31:30Z'), })
def normalize(string: celtypes.StringType) -> celtypes.StringType: """ Normalize a string. """ return celtypes.StringType(string.lower().strip())