def convert_to_iterators(expr, vars, pad=0): """Solve all children in expr and return a list of iterators. Each iterator is expanded or repeated so they are all the same length. """ max_length = 0 iterators = [] # Expand each child into a list. for child in expr.children: val = solve(child, vars).value if repeated.isrepeating(val) and not number.isnumber(val): val = convert_to_list(expr, val) if len(val) > max_length: max_length = len(val) # This is a scalar - at least of length 1. else: max_length = max(max_length, 1) iterators.append(val) # Pad all iterator lists to be the same length. for i, item in enumerate(iterators): # Repeat scalar values. if not isinstance(item, list): iterators[i] = [item] * max_length # Extend short lists to the required length elif len(item) < max_length: item.extend([pad] * (max_length - len(item))) return iterators
def __solve_for_repeated(expr, vars): """Helper: solve 'expr' always returning an IRepeated. If the result of solving 'expr' is a list or a tuple of IStructured objects then treat is as a repeated value of IStructured objects because that's what the called meant to do. This is a convenience helper so users of the API don't have to create IRepeated objects. If the result of solving 'expr' is a scalar then return it as a repeated value of one element. Arguments: expr: Expression to solve. vars: The scope. Returns: IRepeated result of solving 'expr'. A booelan to indicate whether the original was repeating. """ var = solve(expr, vars).value if (var and isinstance(var, (tuple, list)) and protocol.implements(var[0], structured.IStructured)): return repeated.meld(*var), False return var, repeated.isrepeating(var)
def eq_implementation(self, other): if not repeated.isrepeating(other): return False for my_item, other_item in zip(self, other): if my_item != other_item: return False return True
def hex_function(value): """A Function to format the output as a hex string.""" if value == None: return if repeated.isrepeating(value): return [hex_function(x) for x in value] return "%#x" % int(value)
def solve_intersection(expr, vars): for child in expr.children: result = solve(child, vars).value if repeated.isrepeating(result) and not any(result): return Result(False, ()) elif not result: return Result(False, ()) return Result(True, ())
def testCreation(self): """Test that creation is reasonable.""" # This should make a repeated var of two values. r = repeated.repeated("foo", "bar") # It should be a repeated var. self.assertIsInstance(r, repeated.IRepeated) # And also have more than one value. self.assertTrue(repeated.isrepeating(r)) # Repeating a single value will still create a repeated var. r = repeated.repeated("foo") self.assertIsInstance(r, repeated.IRepeated) # But it won't be repeating (have more than one value). self.assertFalse(repeated.isrepeating(r)) # Using meld will just return a scalar on one value. r = repeated.meld("foo") self.assertIsInstance(r, six.string_types) # Meld on two values has the same behavior as repeated. r = repeated.meld("foo", "foo") self.assertIsInstance(r, repeated.IRepeated)
def scope_reflect_runtime_member(scope, name): try: member = scope[name] # For repeated values we take the type of the first # element. This is not great :-(. If Rekall returns a None for # the first element then this will fail. if repeated.isrepeating(member): for x in member: return type(x) return member except KeyError: return protocol.AnyType()
def lazy_filter(): for lhs_value in repeated.getvalues(lhs_values): filter_result = solve(expr.rhs, __nest_scope( expr.lhs, vars, lhs_value)).value # Repeating values are chosen if any of the values returns # true. if repeated.isrepeating(filter_result): if any(filter_result): yield lhs_value else: # Todo: Implement a bool protocol - for now we use the # python bool. Scalar values must evaluate to true. if bool(filter_result): yield lhs_value
def solve_membership(expr, vars): # There is an expectation that "foo" in "foobar" will be true, and, # simultaneously, that "foo" in ["foobar"] will be false. This is how the # analogous operator works in Python, among other languages. Where this # mental model breaks down is around repeated values, because, in EFILTER, # there is no difference between a tuple of one value and the one value, # so that "foo" in ("foobar") is true, while "foo" in ("foobar", "bar") is # false and "foo" in ("foo", "bar") is again true. These semantics are a # little unfortunate, and it may be that, in the future, the in operator # is disallowed on repeated values to prevent ambiguity. needle = solve(expr.element, vars).value if repeated.isrepeating(needle): raise errors.EfilterError( root=expr.element, query=expr.source, message=("More than one value not allowed in the needle. " "Got %d values.") % counted.count(needle)) # We need to fall through to __solve_and_destructure_repeated to handle # row tuples correctly. haystack, isrepeating = __solve_and_destructure_repeated(expr.set, vars) # For non-repeated values just use the first (singleton) value. if not isrepeating: for straw in haystack: haystack = straw break if isinstance(haystack, six.string_types): return Result(needle in haystack, ()) # Repeated values of more than one value and collections behave the same. # There are no proper sets in EFILTER so O(N) is what we get. if isrepeating or isinstance(haystack, (tuple, list)): for straw in haystack: # We're all farmers here. if straw == needle: return Result(True, ()) return Result(False, ()) # If haystack is not a repeating value, but it is iterable then it must # have originated from outside EFILTER. Lets try to do the right thing and # delegate to Python. for straw in haystack: return Result(needle in straw, None) return Result(False, ())
def _expand_kwargs(self, kwargs): for name, value in six.iteritems(kwargs): arg_repeating = self._is_arg_repeating(name) value_repeating = repeated.isrepeating(value) # If the arg expects a singleton and the value is # repeating, then we run the plugin once per value. if not arg_repeating and value_repeating: result = [] for value_item in value: kwargs_copy = kwargs.copy() kwargs_copy[name] = value_item result.append(kwargs_copy) return result return [kwargs]
def solve_resolve(expr, vars): """Use IStructured.resolve to get member (rhs) from the object (lhs). This operation supports both scalars and repeated values on the LHS - resolving from a repeated value implies a map-like operation and returns a new repeated values. """ objs = solve(expr.lhs, vars).value member = solve(expr.rhs, vars).value results = [] if repeated.isrepeating(objs): for o in repeated.getvalues(objs): results.append(structured.resolve(o, member)) return Result(results, ()) return Result(structured.resolve(objs, member), ())
def solve_regexfilter(expr, vars): """A Regex filter which can operate on both strings and repeated. If any item in the array matches, we return the entire row. """ pattern = re.compile(solve(expr.regex, vars).value, re.I) string_ = solve(expr.string, vars).value if repeated.isrepeating(string_): for item in string_: match = pattern.search(six.text_type(str(item))) if match: return Result(match, ()) else: match = pattern.search(six.text_type(str(string_))) if match: return Result(match, ()) return Result(False, ())
def convert_to_list(expr, repeated_list): if not repeated.isrepeating(repeated_list): return [repeated_list] result = [] for element in repeated_list: if element is not None: # The output from a select is a repeated structured # (dict). If it has a single member we just use that, # otherwise we raise because the query is probably bad # (it should only return a single column). if structured.isstructured(element): members = structured.getmembers(element) if len(members) != 1: raise errors.EfilterTypeError( message="Expecting a single column in subselect - " "got %s columns" % len(members), query=expr.source) element = structured.resolve(element, members[0]) result.append(element) return result
def __call__(self, *args, **kwargs): result = [] expanded_args = [] max_length = 1 for expanded_arg in args: if repeated.isrepeating(expanded_arg): # Materialize it. expanded_arg = list(expanded_arg) max_length = max(max_length, len(expanded_arg)) expanded_args.append(expanded_arg) # Second pass pad or expand to the correct length. padded_args = [] for expanded_arg in expanded_args: if isinstance(expanded_arg, list): expanded_arg.extend([pad] * (max_length-len(expanded_arg))) else: expanded_arg = [expanded_arg] * max_length padded_args.append(expanded_arg) for args in zip(*padded_args): result.append(self._call_on_scalar(args, kwargs)) return result
def _materialize_repeated_kwarg(self, kwargs): """Materialize the result of the args. This is a shim between a repeated plugin arg and the efilter stream. We handle the following cases. 1. EFilter LazyRepetition with unstructured elements (e.g. dicts). 2. EFilter LazyRepetition with structured elements. These are usually returned from a subselect. In the special case where the arg name is present in the structure """ result = {} for k, v in six.iteritems(kwargs): if not repeated.isrepeating(v): result[k] = v else: expanded_value = [] for item in v: if structured.isstructured(item): members = structured.getmembers(item) if len(members) == 1 or k in members: # A single column in the subquery - just # use that as the arg value. If the name # emitted is the same as the expected arg # name we also just take that one. expanded_value.append( structured.resolve(item, members[0])) continue expanded_value.append(item) result[k] = expanded_value return result
# This lets us get indices out of Arrays. associative.IAssociative.implement(for_type=obj.Array, implementations={ associative.select: lambda obj, key: obj[key], }) # This lets us do some_array.some_member. Useful for accessing properties. structured.IStructured.implement(for_type=obj.Array, implementations={structured.resolve: getattr}) # Pointers are only repeated if the thing they are pointing to is. repeated.isrepeating.implement( for_type=obj.Pointer, implementation=lambda x: repeated.isrepeating(x.deref())) repeated.IRepeated.implement( for_type=obj.Array, implementations={repeated.getvalues: lambda x: iter(x)}) string.IString.implement( for_type=basic.String, implementations={string.string: lambda x: utils.SmartUnicode(x)}) # Number operations on a pointer manipulate the pointer's value. number.INumber.implement(for_types=(obj.Pointer, obj.NumericProxyMixIn), implementations={ number.sum: lambda x, y: int(x) + y, number.product: lambda x, y: int(x) * y, number.difference: lambda x, y: int(x) - y,