def test_selector_splitting(self): selector1 = Selector("div > span > a") selector2 = Selector("div span > a") selector3 = Selector("div span a") selector4 = Selector("div > span a") self.assertEqual(len(selector1.parts), 3) self.assertEqual(len(selector2.parts), 3) self.assertEqual(len(selector3.parts), 3) self.assertEqual(len(selector4.parts), 3)
def filter_element(step: ActionStep, prepend: str = "", index=0) -> Tuple[str, Dict, int]: event_filter, params, index = filter_event( step, prepend, index) if step.url else ("", {}, index + 1) filters = model_to_dict(step) prop_queries = [] if filters.get("selector"): selector = Selector(filters["selector"]) for idx, tag in enumerate(selector.parts): prop_queries.append( tag.clickhouse_query(query=ELEMENT_PROP_FILTER)) for key in ["tag_name", "text", "href"]: if filters.get(key): prop_queries.append("{} = '{}'".format(key, filters[key])) separator = " AND " selector_query = separator.join(prop_queries) return ( ELEMENT_ACTION_FILTER.format( element_filter=selector_query, event_filter="AND uuid IN {}".format(event_filter) if event_filter else ""), params, index + 1, )
def test_selector_attribute_with_spaces(self): selector1 = Selector(' [data-id="foo bar]"] ') self.assertEqual(selector1.parts[0].data, {"attributes__attr__data-id": "foo bar]"}) self.assertEqual(selector1.parts[0].direct_descendant, False) self.assertEqual(selector1.parts[0].unique_order, 0)
def test_asterisk_in_query(self): # Sometimes people randomly add * but they don't do very much, so just remove them selector1 = Selector("div > *") self.assertEqual(selector1.parts[0].data, {"tag_name": "div"}) self.assertEqual(selector1.parts[0].direct_descendant, False) self.assertEqual(selector1.parts[0].unique_order, 0) self.assertEqual(len(selector1.parts), 1)
def filter_element(filters: Dict, prepend: str = "") -> Tuple[List[str], Dict]: params = {} conditions = [] if filters.get("selector"): selector = Selector(filters["selector"], escape_slashes=False) params["{}selector_regex".format(prepend)] = _create_regex(selector) conditions.append( "match(elements_chain, %({}selector_regex)s)".format(prepend)) if filters.get("tag_name"): params["{}tag_name_regex".format( prepend)] = r"(^|;){}(\.|$|;|:)".format(filters["tag_name"]) conditions.append( "match(elements_chain, %({}tag_name_regex)s)".format(prepend)) attributes: Dict[str, str] = {} for key in ["href", "text"]: if filters.get(key): attributes[key] = re.escape(filters[key]) if len(attributes.keys()) > 0: params["{}attributes_regex".format(prepend)] = ".*?({}).*?".format( ".*?".join([ '{}="{}"'.format(key, value) for key, value in attributes.items() ])) conditions.append( "match(elements_chain, %({}attributes_regex)s)".format(prepend)) return (conditions, params)
def test_unique_order(self): selector1 = Selector("div > div") self.assertEqual( selector1.parts[0].__dict__, {"data": {"tag_name": "div"}, "direct_descendant": False, "unique_order": 0,}, ) self.assertEqual( selector1.parts[1].__dict__, {"data": {"tag_name": "div"}, "direct_descendant": True, "unique_order": 1}, )
def test_selector_id(self): selector1 = Selector('[id="5"] > span') self.assertEqual( selector1.parts[0].__dict__, {"data": {"tag_name": "span"}, "direct_descendant": False, "unique_order": 0,}, ) self.assertEqual( selector1.parts[1].__dict__, {"data": {"attr_id": "5"}, "direct_descendant": True, "unique_order": 0}, )
def test_selector_child(self): selector1 = Selector("div span") self.assertEqual( selector1.parts[0].__dict__, {"data": {"tag_name": "span"}, "direct_descendant": False, "unique_order": 0,}, ) self.assertEqual( selector1.parts[1].__dict__, {"data": {"tag_name": "div"}, "direct_descendant": False, "unique_order": 0,}, )
def test_nth_child(self): selector1 = Selector("div > span:nth-child(3)") self.assertEqual( selector1.parts[0].__dict__, {"data": {"tag_name": "span", "nth_child": "3"}, "direct_descendant": False, "unique_order": 0,}, ) self.assertEqual( selector1.parts[1].__dict__, {"data": {"tag_name": "div"}, "direct_descendant": True, "unique_order": 0}, )
def test_nth_child(self): selector1 = Selector("div > span:nth-child(3)") self.assertEqual(selector1.parts[0].data, {"tag_name": "span", "nth_child": "3"}) self.assertEqual(selector1.parts[0].direct_descendant, False) self.assertEqual(selector1.parts[0].unique_order, 0) self.assertEqual(selector1.parts[1].data, {"tag_name": "div"}) self.assertEqual(selector1.parts[1].direct_descendant, True) self.assertEqual(selector1.parts[1].unique_order, 0)
def test_selector_child(self): selector1 = Selector("div span") self.assertEqual(selector1.parts[0].data, {"tag_name": "span"}) self.assertEqual(selector1.parts[0].direct_descendant, False) self.assertEqual(selector1.parts[0].unique_order, 0) self.assertEqual(selector1.parts[1].data, {"tag_name": "div"}) self.assertEqual(selector1.parts[1].direct_descendant, False) self.assertEqual(selector1.parts[1].unique_order, 0)
def test_unique_order(self): selector1 = Selector("div > div") self.assertEqual(selector1.parts[0].data, {"tag_name": "div"}) self.assertEqual(selector1.parts[0].direct_descendant, False) self.assertEqual(selector1.parts[0].unique_order, 0) self.assertEqual(selector1.parts[1].data, {"tag_name": "div"}) self.assertEqual(selector1.parts[1].direct_descendant, True) self.assertEqual(selector1.parts[1].unique_order, 1)
def test_asterisk_in_middle_of_query(self): selector1 = Selector("div > * > div") self.assertEqual(selector1.parts[0].data, {"tag_name": "div"}) self.assertEqual(selector1.parts[0].direct_descendant, False) self.assertEqual(selector1.parts[0].unique_order, 0) self.assertEqual(selector1.parts[1].data, {"tag_name": "div"}) self.assertEqual(selector1.parts[1].direct_descendant, False) self.assertEqual(selector1.parts[1].unique_order, 1)
def test_selector_attribute(self): selector1 = Selector('div[data-id="5"] > span') self.assertEqual(selector1.parts[0].data, {"tag_name": "span"}) self.assertEqual(selector1.parts[0].direct_descendant, False) self.assertEqual(selector1.parts[0].unique_order, 0) self.assertEqual(selector1.parts[1].data, {"tag_name": "div", "attributes__attr__data-id": "5"}) self.assertEqual(selector1.parts[1].direct_descendant, True) self.assertEqual(selector1.parts[1].unique_order, 0)
def test_selector_id(self): selector1 = Selector('[id="5"] > span') self.assertEqual(selector1.parts[0].data, {"tag_name": "span"}) self.assertEqual(selector1.parts[0].direct_descendant, False) self.assertEqual(selector1.parts[0].unique_order, 0) self.assertEqual(selector1.parts[1].data, {"attr_id": "5"}) self.assertEqual(selector1.parts[1].direct_descendant, True) self.assertEqual(selector1.parts[1].unique_order, 0)
def test_class(self): selector1 = Selector("div.classone.classtwo > span") self.assertEqual(selector1.parts[0].data, {"tag_name": "span"}) self.assertEqual(selector1.parts[0].direct_descendant, False) self.assertEqual(selector1.parts[0].unique_order, 0) self.assertEqual( selector1.parts[1].data, {"tag_name": "div", "attr_class__contains": ["classone", "classtwo"],} ) self.assertEqual(selector1.parts[1].direct_descendant, True) self.assertEqual(selector1.parts[1].unique_order, 0)
def test_class(self): selector1 = Selector("div.classone.classtwo > span") self.assertEqual( selector1.parts[0].__dict__, {"data": {"tag_name": "span"}, "direct_descendant": False, "unique_order": 0,}, ) self.assertEqual( selector1.parts[1].__dict__, { "data": {"tag_name": "div", "attr_class__contains": ["classone", "classtwo"],}, "direct_descendant": True, "unique_order": 0, }, )
def test_selector_id(self): selector1 = Selector('[id="5"] > span') self.assertEqual(selector1.parts[0].__dict__, { 'data': { 'tag_name': 'span' }, 'direct_descendant': False }) self.assertEqual(selector1.parts[1].__dict__, { 'data': { 'attr_id': '5' }, 'direct_descendant': True })
def test_selector_child_direct_descendant(self): selector1 = Selector("div > span") self.assertEqual(selector1.parts[0].__dict__, { 'data': { 'tag_name': 'span' }, 'direct_descendant': False }) self.assertEqual(selector1.parts[1].__dict__, { 'data': { 'tag_name': 'div' }, 'direct_descendant': True })
def filter_element(filters: Dict, prepend: str = "") -> Tuple[List[str], Dict]: params = {} conditions = [] if filters.get("selector"): or_conditions = [] selectors = filters["selector"] if isinstance( filters["selector"], list) else [filters["selector"]] for idx, query in enumerate(selectors): selector = Selector(query, escape_slashes=False) key = "{}_{}_selector_regex".format(prepend, idx) params[key] = _create_regex(selector) or_conditions.append("match(elements_chain, %({})s)".format(key)) if len(or_conditions) > 0: conditions.append("(" + (" OR ".join(or_conditions)) + ")") if filters.get("tag_name"): or_conditions = [] tag_names = filters["tag_name"] if isinstance( filters["tag_name"], list) else [filters["tag_name"]] for idx, tag_name in enumerate(tag_names): key = "{}_{}_tag_name_regex".format(prepend, idx) params[key] = r"(^|;){}(\.|$|;|:)".format(tag_name) or_conditions.append("match(elements_chain, %({})s)".format(key)) if len(or_conditions) > 0: conditions.append("(" + (" OR ".join(or_conditions)) + ")") attributes: Dict[str, List] = {} for key in ["href", "text"]: vals = filters.get(key) if filters.get(key): attributes[key] = [re.escape(vals)] if isinstance( vals, str) else [re.escape(text) for text in filters[key]] if len(attributes.keys()) > 0: or_conditions = [] for key, value_list in attributes.items(): for idx, value in enumerate(value_list): params["{}_{}_{}_attributes_regex".format( prepend, key, idx)] = ".*?({}).*?".format(".*?".join( ['{}="{}"'.format(key, value)])) or_conditions.append( "match(elements_chain, %({}_{}_{}_attributes_regex)s)". format(prepend, key, idx)) if len(or_conditions) > 0: conditions.append("(" + (" OR ".join(or_conditions)) + ")") return (conditions, params)
def test_class(self): selector1 = Selector('div.classone.classtwo > span') self.assertEqual(selector1.parts[0].__dict__, { 'data': { 'tag_name': 'span' }, 'direct_descendant': False }) self.assertEqual( selector1.parts[1].__dict__, { 'data': { 'tag_name': 'div', 'attr_class__contains': ['classone', 'classtwo'] }, 'direct_descendant': True })
def test_nth_child(self): selector1 = Selector('div > span:nth-child(3)') self.assertEqual( selector1.parts[0].__dict__, { 'data': { 'tag_name': 'span', 'nth_child': '3' }, 'direct_descendant': False }) self.assertEqual(selector1.parts[1].__dict__, { 'data': { 'tag_name': 'div' }, 'direct_descendant': True })
def test_unique_order(self): selector1 = Selector('div > div') self.assertEqual( selector1.parts[0].__dict__, { 'data': { 'tag_name': 'div' }, 'direct_descendant': False, 'unique_order': 0 }) self.assertEqual( selector1.parts[1].__dict__, { 'data': { 'tag_name': 'div' }, 'direct_descendant': True, 'unique_order': 1 })
def test_selector_child(self): selector1 = Selector("div span") self.assertEqual( selector1.parts[0].__dict__, { 'data': { 'tag_name': 'span' }, 'direct_descendant': False, 'unique_order': 0 }) self.assertEqual( selector1.parts[1].__dict__, { 'data': { 'tag_name': 'div' }, 'direct_descendant': False, 'unique_order': 0 })
def filter_element(step: ActionStep, prepend: str = "", index=0) -> Tuple[str, Dict, int]: event_filter, params, index = filter_event( step, prepend, index) if step.url else ("", {}, index + 1) filters = model_to_dict(step) if filters.get("selector"): selector = Selector(filters["selector"], escape_slashes=False) params["{}selector_regex".format(prepend)] = _create_regex(selector) if filters.get("tag_name"): params["{}tag_name_regex".format( prepend)] = r"(^|;){}(\.|$|;|:)".format(filters["tag_name"]) attributes: Dict[str, str] = {} for key in ["href", "text"]: if filters.get(key): attributes[key] = re.escape(filters[key]) attributes_regex = False if len(attributes.keys()) > 0: attributes_regex = True params["{}attributes_regex".format(prepend)] = ".*?({}).*?".format( ".*?".join([ '{}="{}"'.format(key, value) for key, value in attributes.items() ])) return ( ELEMENT_ACTION_FILTER.format( selector_regex="AND match(elements_chain, %({}selector_regex)s)". format(prepend) if filters.get("selector") else "", attributes_regex="AND match(elements_chain, %({}attributes_regex)s)" .format(prepend) if attributes_regex else "", tag_name_regex="AND match(elements_chain, %({}tag_name_regex)s)". format(prepend) if filters.get("tag_name") else "", event_filter="AND uuid IN {}".format(event_filter) if event_filter else "", ), params, index + 1, )
def test_selector_attribute(self): selector1 = Selector('div[data-id="5"] > span') self.assertEqual( selector1.parts[0].__dict__, { 'data': { 'tag_name': 'span' }, 'direct_descendant': False, 'unique_order': 0 }) self.assertEqual( selector1.parts[1].__dict__, { 'data': { 'tag_name': 'div', 'attributes__data-id': '5' }, 'direct_descendant': True, 'unique_order': 0 })
def test_selector_with_spaces(self): selector1 = Selector("span ") self.assertEqual(selector1.parts[0].data, {"tag_name": "span"}) self.assertEqual(selector1.parts[0].direct_descendant, False) self.assertEqual(selector1.parts[0].unique_order, 0)
def filter_element(filters: Dict, *, operator: Optional[OperatorType] = None, prepend: str = "") -> Tuple[str, Dict]: if not operator: operator = "exact" params = {} final_conditions = [] if filters.get("selector") is not None: if operator not in ("exact", "is_not"): raise exceptions.ValidationError( 'Filtering by element selector only supports operators "equals" and "doesn\'t equal" currently.' ) selectors = filters["selector"] if isinstance( filters["selector"], list) else [filters["selector"]] if selectors: combination_conditions = [] for idx, query in enumerate(selectors): if not query: # Skip empty selectors continue selector = Selector(query, escape_slashes=False) key = f"{prepend}_{idx}_selector_regex" params[key] = build_selector_regex(selector) combination_conditions.append( f"match(elements_chain, %({key})s)") if combination_conditions: final_conditions.append( f"({' OR '.join(combination_conditions)})") elif operator not in NEGATED_OPERATORS: # If a non-negated filter has an empty selector list provided, it can't match anything return "0 = 191", {} if filters.get("tag_name") is not None: if operator not in ("exact", "is_not"): raise exceptions.ValidationError( 'Filtering by element tag only supports operators "equals" and "doesn\'t equal" currently.' ) tag_names = filters["tag_name"] if isinstance( filters["tag_name"], list) else [filters["tag_name"]] if tag_names: combination_conditions = [] for idx, tag_name in enumerate(tag_names): key = f"{prepend}_{idx}_tag_name_regex" params[key] = rf"(^|;){tag_name}(\.|$|;|:)" combination_conditions.append( f"match(elements_chain, %({key})s)") final_conditions.append(f"({' OR '.join(combination_conditions)})") elif operator not in NEGATED_OPERATORS: # If a non-negated filter has an empty tag_name list provided, it can't match anything return "0 = 192", {} attributes: Dict[str, List] = {} for key in ["href", "text"]: if filters.get(key) is not None: attributes[key] = process_ok_values(filters[key], operator) if attributes: for key, ok_values in attributes.items(): if ok_values: combination_conditions = [] for idx, value in enumerate(ok_values): optional_flag = "(?i)" if operator.endswith( "icontains") else "" params[ f"{prepend}_{key}_{idx}_attributes_regex"] = f'{optional_flag}({key}="{value}")' combination_conditions.append( f"match(elements_chain, %({prepend}_{key}_{idx}_attributes_regex)s)" ) final_conditions.append( f"({' OR '.join(combination_conditions)})") elif operator not in NEGATED_OPERATORS: # If a non-negated filter has an empty href or text list provided, it can't match anything return "0 = 193", {} if final_conditions: return f"{'NOT ' if operator in NEGATED_OPERATORS else ''}({' AND '.join(final_conditions)})", params else: return "", {}
def test_selector_with_spaces(self): selector1 = Selector("span ") self.assertEqual( selector1.parts[0].__dict__, {"data": {"tag_name": "span"}, "direct_descendant": False, "unique_order": 0}, )