def evaluate_across_contexts(node_set, expression_fn): HqueryEvaluationError.must_be_node_set(node_set) node_set_len = len(node_set) ragged = [evaluate_in_context(node, expression_fn, position=index+1, size=node_set_len) for index, node in enumerate(node_set)] return make_node_set([item for sublist in ragged for item in sublist])
def evaluate(self): verbose_print('Evaluating union decomposition ({} clauses)'.format( len(self.mapping_generators)), indent_after=True) sequence = make_sequence(self.union_expression()) result = [] for item in sequence: verbose_print( lambda: u'Visiting item {0}'.format(debug_dump_anything(item)), indent_after=True) with variable_scope(): push_variable('_', make_sequence(item)) if not hasattr(item, 'union_index'): raise HqueryEvaluationError( "Union decomposition applied to something that wasn't produced by a union" ) if item.union_index >= len(self.mapping_generators): raise HqueryEvaluationError( "Decomposed union had more clauses than its mapping") this_result = make_sequence( self.mapping_generators[item.union_index]()) verbose_print( 'Mapping yielded {0} results for this visit'.format( len(this_result))) result = sequence_concat(result, this_result) verbose_print('Visit finished', outdent_before=True) verbose_print('Union decomposition completed', outdent_before=True) return result
def class_(*args): if len(args) == 1: tag = get_context_node() name = args[0] elif len(args) == 2: HqueryEvaluationError.must_be_node_set(args[0]) tag = args[0][0] name = args[1] else: raise HqueryEvaluationError('class() expects one or two arguments; got {0}'.format(len(args))) return boolean(name in tag['class'])
def class_(*args): if len(args) == 1: tag = get_context_node() name = args[0] elif len(args) == 2: HqueryEvaluationError.must_be_node_set(args[0]) tag = args[0][0] name = args[1] else: raise HqueryEvaluationError( 'class() expects one or two arguments; got {0}'.format(len(args))) return boolean(name in tag['class'])
def make_node_set(node_set, reverse=False): ids = set() def is_unique_id(node): node_id = id(node) if node_id in ids: return False else: ids.add(node_id) return True if not isinstance(node_set, list): node_set = [node_set] non_node_member = next(filterfalse(is_any_node, node_set), False) if non_node_member: format_str = 'Constructed node set that includes {0} object "{1}"' raise HqueryEvaluationError( format_str.format(object_type_name(non_node_member), non_node_member)) node_set = list( sorted(filter(is_unique_id, node_set), key=lambda n: n.hq_doc_index, reverse=reverse)) return node_set
def evaluate(self): result = dict() for item in make_sequence( self.contents()) if self.contents is not None else []: if isinstance(item, HashKeyValue): if is_sequence(item.value) and len(item.value) == 1: item.value = item.value[0] if is_number(item.value) or is_boolean(item.value): result[item.key] = item.value.value elif is_hash(item.value) or is_array(item.value): result[item.key] = item.value.contents else: result[item.key] = string_value(item.value) elif is_tag_node(item): self._gab('adding element "{0}" to contents'.format(item.name)) self._process_tag(result, item) elif is_text_node(item) or is_string(item): self._gab('adding text "{0}" to contents'.format( debug_dump_long_string(string_value(item)))) result['text'] = self._append_to_text( result['text'] if 'text' in result else '', string_value(item)) else: value_desc = debug_dump_node(item) if is_any_node( item) else object_type_name(item) raise HqueryEvaluationError( 'Cannot use {0} as a content object in a computed JSON hash constructor' .format(value_desc)) self._process_filters(result) return JsonHash(result)
def evaluate_in_context(node, expression_fn, position=1, size=1, preserve_space=None): if not is_any_node(node): raise HqueryEvaluationError('cannot use {0} "{1}" as context node'.format(type(node), debug_dump_long_string(str(node)))) push_context(node, position, size, preserve_space) result = expression_fn() pop_context() return result
def call_function(self, name, *args): self._load_all_functions() py_name = name.replace('-', '_') try: fn = self.all_functions[py_name] except KeyError: raise HqueryEvaluationError( 'Unknown function name "{0}"'.format(name)) try: return fn(*args) except TypeError as err: if re.search(r'\d+ (?:.+ )?argument', err.args[0]): raise HqueryEvaluationError(err.args[0]) else: raise
def round_(*args): if len(args) == 0: raise HqueryEvaluationError( 'round() function requires at least one argument') value = args[0] if math.isnan(value.value): return value else: return number(round(value.value, 0 if len(args) < 2 else args[1].value))
def equals(first, second): first_type = object_type(first) second_type = object_type(second) try: reverse = first_type > second_type op = equality_ops_table[first_type if not reverse else second_type][second_type if not reverse else first_type] return boolean(op(first if not reverse else second, second if not reverse else first)) except TypeError: msg = 'type mismatch comparing {0} and {1} for equality' raise HqueryEvaluationError(msg.format(object_type_name(first_type), object_type_name(second_type)))
def tokenize(*args): argc = len(args) if argc < 2 or argc > 3: raise HqueryEvaluationError( 'replace() expects 2 or 3 arguments; was passed {0}'.format(argc)) input = string_value(args[0]) pattern = args[1] if argc == 3: flags = _xpath_flags_to_re_flags(args[2]) else: flags = 0 return re.split(pattern, input, flags=flags)
def replace(*args): argc = len(args) if argc < 3 or argc > 4: raise HqueryEvaluationError( 'replace() expects 3 or 4 arguments; was passed {0}'.format(argc)) input = string_value(args[0]) pattern = args[1] replacement = args[2] if argc == 4: flags = _xpath_flags_to_re_flags(args[3]) else: flags = 0 return re.sub(pattern, replacement, input, flags=flags)
def _xpath_flags_to_re_flags(flags): re_flags_map = { 'i': re.IGNORECASE, 'm': re.MULTILINE, 's': re.DOTALL, 'x': re.VERBOSE, } try: result = 0 for flag in flags: result |= re_flags_map[flag] return result except KeyError as e: raise HqueryEvaluationError( 'Unexpected regular expression flag "{0}"'.format(e.args[0]))
def evaluate(self): result = '' for value in make_sequence( self.contents()) if self.contents is not None else []: if is_string(value) or is_number(value) or is_boolean(value): result = self._append_to_contents(result, str(value)) elif is_attribute_node(value): result = self._append_to_contents(result, value.value) elif is_tag_node(value): result = self._append_to_contents(result, string_value(value)) else: value_desc = debug_dump_node(value) if is_any_node( value) else object_type_name(value) raise HqueryEvaluationError( 'Cannot use {0} as a content object in a computed attribute constructor' .format(value_desc)) return AttributeNode(self.name, result)
def matches(*args): scenario = len(args) flags = 0 if scenario < 1 or scenario > 3: raise HqueryEvaluationError( 'matches() called with {0} arguments; expected one, two or three.'. format(scenario)) if scenario == 1: input = string_value(get_context_node()) pattern = args[0] else: input = string_value(args[0]) pattern = args[1] if scenario == 3: flags = _xpath_flags_to_re_flags(args[2]) return boolean(re.search(pattern, input, flags))
def evaluate(self): soup = BeautifulSoup('<{0}></{0}>'.format(self.name), 'html.parser') result = getattr(soup, self.name) for value in make_sequence( self.contents()) if self.contents is not None else []: if is_tag_node(value): result.append(self._clone_tag(value)) elif is_attribute_node(value): result[value.name] = value.value elif is_string(value) or is_number(value) or is_boolean(value): result.append(str(value)) else: value_desc = debug_dump_node(value) if is_any_node( value) else object_type_name(value) raise HqueryEvaluationError( 'Cannot use {0} as a content object in a computed element constructor' .format(value_desc)) return make_node_set(result)
def _make_array_item(self, value): if is_tag_node(value): self._gab( lambda: 'appending text contents of element "{0}" to array'. format(debug_dump_anything(value))) return string_value(value) elif is_text_node(value) or is_string(value): value = string_value(value) self._gab(lambda: u'appending text "{0}" to array'.format( debug_dump_anything(value))) return value elif is_boolean(value) or is_number(value): self._gab(lambda: 'appending {0} to array'.format( debug_dump_anything(value))) return value.value elif is_hash(value): self._gab(lambda: u'appending JSON {0} to array'.format( debug_dump_anything(value))) return value.contents else: raise HqueryEvaluationError( "Can't use {0} as contents in a computed JSON array constructor" .format(debug_dump_anything(value)))
def __init__(self, contents): if not isinstance(contents, list): raise HqueryEvaluationError( 'Attempted to construct a JSON array based on a(n) {0} object'. format(contents.__class__.__name__)) self.contents = contents
def count(sequence): HqueryEvaluationError.must_be_node_set_or_sequence(sequence) return number(len(sequence))