def test_empty_from_json(self): # an empty Element has no rules or children # creating it should fail empty_json = { "selector": "div", "children": [], "rules": [], "spec": { "type": "single", "index": 0 } } with self.assertRaises(ValueError): ElementFactory.from_json(empty_json)
def test_all_child_single_optional_fail(self): """ When an Element is optional, its rules and all descendent Element's rules must be considered optional """ all_copy = copy.deepcopy(ALL_ELEMENT) single_copy = copy.deepcopy(SINGLE_ELEMENT) single_copy["optional"] = True all_copy["children"].append(single_copy) ele = ElementFactory.from_json(all_copy) flattened = flatten_element(ele) optional_outputs = [ { "items": [ { "title": "Foundation", "url": "http://www.isaacasimov.com/foundation" } ] }, {}, ] for out in optional_outputs: self.assertFalse(compare(out, flattened))
def test_range_element(self): range_copy = copy.deepcopy(RANGE_ELEMENT) ele = ElementFactory.from_json(range_copy) flattened = flatten_element(ele) output = {"items": [{"count": 7}, {"count": 12}]} self.assertIsNone(differences(output, flattened))
def test_all_optional(self): """ for an optional AllElement, the dict for it will be optional as will all rules from it and its children """ all_copy = copy.deepcopy(ALL_ELEMENT) all_copy["optional"] = True ele = ElementFactory.from_json(all_copy) flattened = flatten_element(ele) # "items", dict, True items_tuple = flattened.get("items") items, items_optional = items_tuple self.assertIsInstance(items, dict) self.assertEqual(items_optional, True) expected_item_keys = [ ("count", int, True) ] for item in expected_item_keys: name, expected_type, expected_optional = item rule = items.get(name) self.assertIsNotNone(rule) _type, optional = rule self.assertIs(_type, expected_type) self.assertEqual(optional, expected_optional)
def test_all_child_single_optional(self): all_copy = copy.deepcopy(ALL_ELEMENT) single_copy = copy.deepcopy(SINGLE_ELEMENT) all_copy["children"].append(single_copy) single_copy["optional"] = True ele = ElementFactory.from_json(all_copy) flattened = flatten_element(ele) # "items", dict, False items_tuple = flattened.get("items") items, items_optional = items_tuple self.assertIsInstance(items, dict) self.assertEqual(items_optional, False) expected_item_keys = [ ("count", int, False), ("title", str, True), ("url", str, True) ] for item in expected_item_keys: name, expected_type, expected_optional = item rule = items.get(name) self.assertIsNotNone(rule) _type, optional = rule self.assertIs(_type, expected_type) self.assertEqual(optional, expected_optional)
def test_children_data(self): e = ElementFactory.from_json(SINGLE_JSON) parent = html.fragment_fromstring( "<body><div><a href=\"#\">Test</a></div></body>") data = e.data(parent) self.assertIn("link", data) self.assertIn("headline", data)
def test_all_child_single(self): """ When a SingleElement is nested within an AllELement (or RangeElement), all of its rules will be added to the dict of the AllElement's rules. """ all_copy = copy.deepcopy(ALL_ELEMENT) single_copy = copy.deepcopy(SINGLE_ELEMENT) all_copy["children"].append(single_copy) ele = ElementFactory.from_json(all_copy) flattened = flatten_element(ele) output = { "items": [ { "count": 7, "title": "Nightfall", "url": "http://www.isaacasimov.com/nightfall" }, { "count": 12, "title": "Foundation", "url": "http://www.isaacasimov.com/foundation" } ] } self.assertTrue(compare(output, flattened))
def test_range_optional(self): """ the RangeElement flattens the same as an AllElement """ range_copy = copy.deepcopy(RANGE_ELEMENT) range_copy["optional"] = True ele = ElementFactory.from_json(range_copy) flattened = flatten_element(ele) # "items", dict, True items_tuple = flattened.get("items") items, items_optional = items_tuple self.assertIsInstance(items, dict) self.assertEqual(items_optional, True) expected_item_keys = [("count", int, True)] for item in expected_item_keys: name, expected_type, expected_optional = item rule = items.get(name) self.assertIsNotNone(rule) _type, optional = rule self.assertIs(_type, expected_type) self.assertEqual(optional, expected_optional)
def test_all_child_all_optional(self): all_copy = copy.deepcopy(ALL_ELEMENT) alt_all_copy = copy.deepcopy(ALT_ALL_ELEMENT) alt_all_copy["optional"] = True all_copy["children"].append(alt_all_copy) ele = ElementFactory.from_json(all_copy) flattened = flatten_element(ele) outputs = [{ "items": [{ "count": 6, "paragraphs": [{ "description": "foo" }, { "description": "bar" }] }] }, { "items": [{ "count": 6, "paragraphs": [] }] }, { "items": [{ "count": 6 }] }] for o in outputs: self.assertIsNone(differences(o, flattened))
def test_single_child_all(self): """ when an AllElement (or RangeElement) is nested within a SingleElement, a key/value pair with the AllElement's spec name and a dict containing it (and its children's) rules will be created """ all_copy = copy.deepcopy(ALL_ELEMENT) single_copy = copy.deepcopy(SINGLE_ELEMENT) single_copy["children"].append(all_copy) ele = ElementFactory.from_json(single_copy) flattened = flatten_element(ele) output = { "title": "Nightfall", "url": "http://www.isaacasimov.com/nightfall", "items": [ { "count": 7, }, { "count": 12, } ] } self.assertTrue(compare(output, flattened))
def test_single_from_json(self): e = ElementFactory.from_json(SINGLE_JSON) self.assertIsNotNone(e) self.assertIsInstance(e, SingleElement) # test type of child first_child = e.children[0] self.assertIsInstance(first_child, SingleElement)
def test_single_child_all_optional_fail(self): all_copy = copy.deepcopy(ALL_ELEMENT) single_copy = copy.deepcopy(SINGLE_ELEMENT) single_copy["children"].append(all_copy) all_copy["optional"] = True ele = ElementFactory.from_json(single_copy) flattened = flatten_element(ele) bad_outputs = [ { "url": "http://www.isaacasimov.com/nightfall", "items": [ { "count": 7, }, { "count": 12, } ] }, { "title": "Nightfall", "items": [ { "count": 7, }, { "count": 12, } ] } ] for o in bad_outputs: self.assertFalse(compare(o, flattened))
def test_all_child_single_optional_fail(self): """ When an Element is optional, its rules and all descendent Element's rules must be considered optional """ all_copy = copy.deepcopy(ALL_ELEMENT) single_copy = copy.deepcopy(SINGLE_ELEMENT) single_copy["optional"] = True all_copy["children"].append(single_copy) ele = ElementFactory.from_json(all_copy) flattened = flatten_element(ele) optional_outputs = [ { "items": [ { "title": "Foundation", "url": "http://www.isaacasimov.com/foundation" } ] }, {}, ] for out in optional_outputs: self.assertIsNotNone(differences(out, flattened))
def test_all_child_single(self): """ When a SingleElement is nested within an AllELement (or RangeElement), all of its rules will be added to the dict of the AllElement's rules. """ all_copy = copy.deepcopy(ALL_ELEMENT) single_copy = copy.deepcopy(SINGLE_ELEMENT) all_copy["children"].append(single_copy) ele = ElementFactory.from_json(all_copy) flattened = flatten_element(ele) output = { "items": [ { "count": 7, "title": "Nightfall", "url": "http://www.isaacasimov.com/nightfall" }, { "count": 12, "title": "Foundation", "url": "http://www.isaacasimov.com/foundation" } ] } self.assertIsNone(differences(output, flattened))
def test_single_child_all_optional_fail(self): all_copy = copy.deepcopy(ALL_ELEMENT) single_copy = copy.deepcopy(SINGLE_ELEMENT) single_copy["children"].append(all_copy) all_copy["optional"] = True ele = ElementFactory.from_json(single_copy) flattened = flatten_element(ele) bad_outputs = [ { "url": "http://www.isaacasimov.com/nightfall", "items": [ { "count": 7, }, { "count": 12, } ] }, { "title": "Nightfall", "items": [ { "count": 7, }, { "count": 12, } ] } ] for o in bad_outputs: self.assertIsNotNone(differences(o, flattened))
def test_single_child_all(self): """ when an AllElement (or RangeElement) is nested within a SingleElement, a key/value pair with the AllElement's spec name and a dict containing it (and its children's) rules will be created """ all_copy = copy.deepcopy(ALL_ELEMENT) single_copy = copy.deepcopy(SINGLE_ELEMENT) single_copy["children"].append(all_copy) ele = ElementFactory.from_json(single_copy) flattened = flatten_element(ele) output = { "title": "Nightfall", "url": "http://www.isaacasimov.com/nightfall", "items": [ { "count": 7, }, { "count": 12, } ] } self.assertIsNone(differences(output, flattened))
def test_range_optional(self): """ the RangeElement flattens the same as an AllElement """ range_copy = copy.deepcopy(RANGE_ELEMENT) range_copy["optional"] = True ele = ElementFactory.from_json(range_copy) flattened = flatten_element(ele) # "items", dict, True items_tuple = flattened.get("items") items, items_optional = items_tuple self.assertIsInstance(items, dict) self.assertEqual(items_optional, True) expected_item_keys = [ ("count", int, True) ] for item in expected_item_keys: name, expected_type, expected_optional = item rule = items.get(name) self.assertIsNotNone(rule) _type, optional = rule self.assertIs(_type, expected_type) self.assertEqual(optional, expected_optional)
def test_all_child_single_fail(self): """ When a SingleElement is nested within an AllELement (or RangeElement), all of its rules will be added to the dict of the AllElement's rules. """ all_copy = copy.deepcopy(ALL_ELEMENT) single_copy = copy.deepcopy(SINGLE_ELEMENT) all_copy["children"].append(single_copy) ele = ElementFactory.from_json(all_copy) flattened = flatten_element(ele) bad_outputs = [{ "items": [{ "count": "12", "title": "Foundation", "url": "http://www.isaacasimov.com/foundation" }] }, { "items": [{ "title": "Foundation", "url": "http://www.isaacasimov.com/foundation" }] }, { "items": [{ "count": 12, "url": "http://www.isaacasimov.com/foundation" }] }, {}] for out in bad_outputs: self.assertIsNotNone(differences(out, flattened))
def test_all_optional(self): """ for an optional AllElement, the dict for it will be optional as will all rules from it and its children """ all_copy = copy.deepcopy(ALL_ELEMENT) all_copy["optional"] = True ele = ElementFactory.from_json(all_copy) flattened = flatten_element(ele) # "items", dict, True items_tuple = flattened.get("items") items, items_optional = items_tuple self.assertIsInstance(items, dict) self.assertEqual(items_optional, True) expected_item_keys = [("count", int, True)] for item in expected_item_keys: name, expected_type, expected_optional = item rule = items.get(name) self.assertIsNotNone(rule) _type, optional = rule self.assertIs(_type, expected_type) self.assertEqual(optional, expected_optional)
def test_all_child_all_optional_fail(self): all_copy = copy.deepcopy(ALL_ELEMENT) alt_all_copy = copy.deepcopy(ALT_ALL_ELEMENT) alt_all_copy["optional"] = True all_copy["children"].append(alt_all_copy) ele = ElementFactory.from_json(all_copy) flattened = flatten_element(ele) bad_outputs = [ { "items": [ { "paragraphs": [ { "description": "foo" }, { "description": "bar" } ] } ] }, {} ] for o in bad_outputs: self.assertIsNotNone(differences(o, flattened))
def test_all_child_single_optional(self): all_copy = copy.deepcopy(ALL_ELEMENT) single_copy = copy.deepcopy(SINGLE_ELEMENT) all_copy["children"].append(single_copy) single_copy["optional"] = True ele = ElementFactory.from_json(all_copy) flattened = flatten_element(ele) # "items", dict, False items_tuple = flattened.get("items") items, items_optional = items_tuple self.assertIsInstance(items, dict) self.assertEqual(items_optional, False) expected_item_keys = [("count", int, False), ("title", str, True), ("url", str, True)] for item in expected_item_keys: name, expected_type, expected_optional = item rule = items.get(name) self.assertIsNotNone(rule) _type, optional = rule self.assertIs(_type, expected_type) self.assertEqual(optional, expected_optional)
def test_all_element_fail(self): all_copy = copy.deepcopy(ALL_ELEMENT) ele = ElementFactory.from_json(all_copy) flattened = flatten_element(ele) output = {"items": [{"count": 7}, {}]} self.assertIsNotNone(differences(output, flattened))
def test_all_child_all_optional_fail(self): all_copy = copy.deepcopy(ALL_ELEMENT) alt_all_copy = copy.deepcopy(ALT_ALL_ELEMENT) alt_all_copy["optional"] = True all_copy["children"].append(alt_all_copy) ele = ElementFactory.from_json(all_copy) flattened = flatten_element(ele) bad_outputs = [ { "items": [ { "paragraphs": [ { "description": "foo" }, { "description": "bar" } ] } ] }, {} ] for o in bad_outputs: self.assertFalse(compare(o, flattened))
def test_single_element(self): single_copy = copy.deepcopy(SINGLE_ELEMENT) ele = ElementFactory.from_json(single_copy) flattened = flatten_element(ele) output = { "title": "Nightfall", "url": "http://www.isaacasimov.com/nightfall" } self.assertTrue(compare(output, flattened))
def test_single_element(self): single_copy = copy.deepcopy(SINGLE_ELEMENT) ele = ElementFactory.from_json(single_copy) flattened = flatten_element(ele) output = { "title": "Nightfall", "url": "http://www.isaacasimov.com/nightfall" } self.assertIsNone(differences(output, flattened))
def test_all_element(self): all_copy = copy.deepcopy(ALL_ELEMENT) ele = ElementFactory.from_json(all_copy) flattened = flatten_element(ele) output = { "items": [ {"count": 7}, {"count": 12} ] } self.assertIsNone(differences(output, flattened))
def test_range_element_fail(self): range_copy = copy.deepcopy(RANGE_ELEMENT) ele = ElementFactory.from_json(range_copy) flattened = flatten_element(ele) output = { "items": [ {"count": 7}, {} ] } self.assertIsNotNone(differences(output, flattened))
def test_single_child_single_optional_fail(self): single_copy = copy.deepcopy(SINGLE_ELEMENT) alt_single_copy = copy.deepcopy(ALT_SINGLE_ELEMENT) single_copy["children"].append(alt_single_copy) alt_single_copy["optional"] = True ele = ElementFactory.from_json(single_copy) flattened = flatten_element(ele) outputs = [{"title": "The Dark Forest", "text": "The Second Novel"}] for o in outputs: self.assertIsNotNone(differences(o, flattened))
def test_all_element(self): all_copy = copy.deepcopy(ALL_ELEMENT) ele = ElementFactory.from_json(all_copy) flattened = flatten_element(ele) output = { "items": [ {"count": 7}, {"count": 12} ] } self.assertTrue(compare(output, flattened))
def test_range_element_fail(self): range_copy = copy.deepcopy(RANGE_ELEMENT) ele = ElementFactory.from_json(range_copy) flattened = flatten_element(ele) output = { "items": [ {"count": 7}, {} ] } self.assertFalse(compare(output, flattened))
def test_optional_range_element(self): """ for an optional RangeElement, the dict for it will be optional as will all rules from it and its children """ range_copy = copy.deepcopy(RANGE_ELEMENT) range_copy["optional"] = True ele = ElementFactory.from_json(range_copy) flattened = flatten_element(ele) output = {} self.assertTrue(compare(output, flattened))
def test_optional_range_element(self): """ for an optional RangeElement, the dict for it will be optional as will all rules from it and its children """ range_copy = copy.deepcopy(RANGE_ELEMENT) range_copy["optional"] = True ele = ElementFactory.from_json(range_copy) flattened = flatten_element(ele) output = {} self.assertIsNone(differences(output, flattened))
def test_optional_all_element(self): """ for an optional AllElement, the dict for it will be optional as will all rules from it and its children """ all_copy = copy.deepcopy(ALL_ELEMENT) all_copy["optional"] = True ele = ElementFactory.from_json(all_copy) flattened = flatten_element(ele) output = {} self.assertIsNone(differences(output, flattened))
def test_single_child_single(self): single_copy = copy.deepcopy(SINGLE_ELEMENT) alt_single_copy = copy.deepcopy(ALT_SINGLE_ELEMENT) single_copy["children"].append(alt_single_copy) ele = ElementFactory.from_json(single_copy) flattened = flatten_element(ele) output = { "title": "The Dark Forest", "url": "http://www.liucixin.com/the_dark_forest", "text": "The Second Novel" } self.assertTrue(compare(output, flattened))
def test_single_child_single(self): single_copy = copy.deepcopy(SINGLE_ELEMENT) alt_single_copy = copy.deepcopy(ALT_SINGLE_ELEMENT) single_copy["children"].append(alt_single_copy) ele = ElementFactory.from_json(single_copy) flattened = flatten_element(ele) output = { "title": "The Dark Forest", "url": "http://www.liucixin.com/the_dark_forest", "text": "The Second Novel" } self.assertIsNone(differences(output, flattened))
def test_single(self): single_copy = copy.deepcopy(SINGLE_ELEMENT) ele = ElementFactory.from_json(single_copy) flattened = flatten_element(ele) expected = [("url", str, False), ("title", str, False)] for item in expected: name, expected_type, expected_optional = item rule = flattened.get(name) self.assertIsNotNone(rule) _type, optional = rule self.assertIs(_type, expected_type) self.assertEqual(optional, expected_optional)
def test_single_element_fail(self): single_copy = copy.deepcopy(SINGLE_ELEMENT) ele = ElementFactory.from_json(single_copy) flattened = flatten_element(ele) outputs = [{ "title": "Nightfall" }, { "url": "http://www.isaacasimov.com/nightfall" }, { "title": "Nightfall", "url": 7 }, {}] for o in outputs: self.assertIsNotNone(differences(o, flattened))
def test_rule_no_attr(self): """ when an element returns None, it is filtered out """ e = ElementFactory.from_json(RANGE_JSON) parent = html.fragment_fromstring(""" <section> <div><a href=\"#\">Test</a></div> <div><a>Test</a></div> <div><a href=\"#\">Test</a></div> <div><a href=\"#\">Test</a></div> </section> """) data = e.data(parent) divs = data.get("divs") self.assertEqual(len(divs), 1)
def test_child_element_doesnt_exist(self): """ when a child element doesn't exist, its data is filtered out """ e = ElementFactory.from_json(ALL_JSON) parent = html.fragment_fromstring(""" <section> <div><a href=\"#foo\">Foo</a></div> <div></div> <div><a href=\"#baz\">Baz</a></div> <div><a href=\"#quux\">Quux</a></div> </section> """) data = e.data(parent) divs = data.get("divs") self.assertEqual(len(divs), 3)
def test_single_child_single_optional_fail(self): single_copy = copy.deepcopy(SINGLE_ELEMENT) alt_single_copy = copy.deepcopy(ALT_SINGLE_ELEMENT) single_copy["children"].append(alt_single_copy) alt_single_copy["optional"] = True ele = ElementFactory.from_json(single_copy) flattened = flatten_element(ele) outputs = [ { "title": "The Dark Forest", "text": "The Second Novel" } ] for o in outputs: self.assertIsNotNone(differences(o, flattened))
def test_single_child_single_fail(self): single_copy = copy.deepcopy(SINGLE_ELEMENT) alt_single_copy = copy.deepcopy(ALT_SINGLE_ELEMENT) single_copy["children"].append(alt_single_copy) ele = ElementFactory.from_json(single_copy) flattened = flatten_element(ele) outputs = [{ "title": "The Dark Forest", "url": "http://www.liucixin.com/the_dark_forest", }, { "url": "http://www.liucixin.com/the_dark_forest", "text": "The Second Novel" }] for o in outputs: self.assertIsNotNone(differences(o, flattened))
def test_all_data_doesnt_exist(self): """ when all matched elements return None, return an empty list """ e = ElementFactory.from_json(RANGE_JSON) parent = html.fragment_fromstring(""" <section> <div></div> <div></div> <div><a href=\"#\">Test</a></div> <div><a href=\"#\">Test</a></div> </section> """) data = e.data(parent) divs = data.get("divs") self.assertIsInstance(divs, list) self.assertEqual(len(divs), 0)
def test_all_data_none(self): """ when every element returns None, data will be an empty list """ e = ElementFactory.from_json(ALL_JSON) parent = html.fragment_fromstring(""" <section> <div><a>Foo</a></div> <div><a>Bar</a></div> <div><a>Baz</a></div> <div><a>Quux</a></div> </section> """) data = e.data(parent) divs = data.get("divs") self.assertIsInstance(divs, list) self.assertEqual(len(divs), 0)
def test_all_optional_child_all(self): all_copy = copy.deepcopy(ALL_ELEMENT) alt_all_copy = copy.deepcopy(ALT_ALL_ELEMENT) all_copy["children"].append(alt_all_copy) all_copy["optional"] = True ele = ElementFactory.from_json(all_copy) flattened = flatten_element(ele) # "items", dict, True items_tuple = flattened.get("items") items, items_optional = items_tuple self.assertIsInstance(items, dict) self.assertEqual(items_optional, True) expected_item_keys = [ ("count", int, True) ] for item in expected_item_keys: name, expected_type, expected_optional = item rule = items.get(name) self.assertIsNotNone(rule) _type, optional = rule self.assertIs(_type, expected_type) self.assertEqual(optional, expected_optional) # "paragraphs", dict, True paragraphs_tuple = items.get("paragraphs") paragraphs, paragraphs_optional = paragraphs_tuple self.assertIsInstance(paragraphs, dict) self.assertEqual(paragraphs_optional, True) expected_paragraph_keys = [ ("description", str, True) ] for item in expected_paragraph_keys: name, expected_type, expected_optional = item rule = paragraphs.get(name) self.assertIsNotNone(rule) _type, optional = rule self.assertIs(_type, expected_type) self.assertEqual(optional, expected_optional)
def test_child_element_doesnt_exist(self): """ when a child element doesn't exist, its data is filtered out """ e = ElementFactory.from_json(ALL_JSON) parent = html.fragment_fromstring( """ <section> <div><a href=\"#foo\">Foo</a></div> <div></div> <div><a href=\"#baz\">Baz</a></div> <div><a href=\"#quux\">Quux</a></div> </section> """ ) data = e.data(parent) divs = data.get("divs") self.assertEqual(len(divs), 3)
def test_rule_no_attr(self): """ when an element returns None, it is filtered out """ e = ElementFactory.from_json(RANGE_JSON) parent = html.fragment_fromstring( """ <section> <div><a href=\"#\">Test</a></div> <div><a>Test</a></div> <div><a href=\"#\">Test</a></div> <div><a href=\"#\">Test</a></div> </section> """ ) data = e.data(parent) divs = data.get("divs") self.assertEqual(len(divs), 1)
def test_children_data(self): """ child data is merged into the datum for each element """ e = ElementFactory.from_json(RANGE_JSON) parent = html.fragment_fromstring(""" <section> <div><a href=\"#\">Test</a></div> <div><a>Test</a></div> <div><a href=\"#\">Test</a></div> <div><a href=\"#\">Test</a></div> </section> """) data = e.data(parent) self.assertIn("divs", data) for div in data.get("divs"): self.assertIn("link", div) self.assertIn("headline", div)
def test_single(self): single_copy = copy.deepcopy(SINGLE_ELEMENT) ele = ElementFactory.from_json(single_copy) flattened = flatten_element(ele) expected = [ ("url", str, False), ("title", str, False) ] for item in expected: name, expected_type, expected_optional = item rule = flattened.get(name) self.assertIsNotNone(rule) _type, optional = rule self.assertIs(_type, expected_type) self.assertEqual(optional, expected_optional)
def test_all_data_none(self): """ when every element returns None, data will be an empty list """ e = ElementFactory.from_json(ALL_JSON) parent = html.fragment_fromstring( """ <section> <div><a>Foo</a></div> <div><a>Bar</a></div> <div><a>Baz</a></div> <div><a>Quux</a></div> </section> """ ) data = e.data(parent) divs = data.get("divs") self.assertIsInstance(divs, list) self.assertEqual(len(divs), 0)
def test_all_data_doesnt_exist(self): """ when all matched elements return None, return an empty list """ e = ElementFactory.from_json(RANGE_JSON) parent = html.fragment_fromstring( """ <section> <div></div> <div></div> <div><a href=\"#\">Test</a></div> <div><a href=\"#\">Test</a></div> </section> """ ) data = e.data(parent) divs = data.get("divs") self.assertIsInstance(divs, list) self.assertEqual(len(divs), 0)
def test_children_data(self): """ child data is merged into the datum for each element """ e = ElementFactory.from_json(RANGE_JSON) parent = html.fragment_fromstring( """ <section> <div><a href=\"#\">Test</a></div> <div><a>Test</a></div> <div><a href=\"#\">Test</a></div> <div><a href=\"#\">Test</a></div> </section> """ ) data = e.data(parent) self.assertIn("divs", data) for div in data.get("divs"): self.assertIn("link", div) self.assertIn("headline", div)
def test_optional_single_element(self): single_copy = copy.deepcopy(SINGLE_ELEMENT) single_copy["optional"] = True ele = ElementFactory.from_json(single_copy) flattened = flatten_element(ele) outputs = [ { "title": "Nightfall", "url": "http://www.isaacasimov.com/nightfall" }, { "url": "http://www.isaacasimov.com/nightfall" }, { "title": "Nightfall", }, {} ] for o in outputs: self.assertIsNone(differences(o, flattened))
def test_children_data(self): """ the data from child elements is merged into the datum for each element """ e = ElementFactory.from_json(ALL_JSON) parent = html.fragment_fromstring( """ <section> <div><a href=\"#foo\">Foo</a></div> <div><a href=\"#bar\">Bar</a></div> <div><a href=\"#baz\">Baz</a></div> <div><a href=\"#quux\">Quux</a></div> </section> """ ) data = e.data(parent) self.assertIn("divs", data) for div in data.get("divs"): self.assertIn("link", div) self.assertIn("headline", div)
def test_single_child_single_optional(self): single_copy = copy.deepcopy(SINGLE_ELEMENT) alt_single_copy = copy.deepcopy(ALT_SINGLE_ELEMENT) single_copy["children"].append(alt_single_copy) alt_single_copy["optional"] = True ele = ElementFactory.from_json(single_copy) flattened = flatten_element(ele) expected = [ ("url", str, False), ("title", str, False), ("text", str, True) ] for item in expected: name, expected_type, expected_optional = item rule = flattened.get(name) self.assertIsNotNone(rule) _type, optional = rule self.assertIs(_type, expected_type) self.assertEqual(optional, expected_optional)
def test_all(self): all_copy = copy.deepcopy(ALL_ELEMENT) ele = ElementFactory.from_json(all_copy) flattened = flatten_element(ele) # "items", dict, False items_tuple = flattened.get("items") items, items_optional = items_tuple self.assertIsInstance(items, dict) self.assertEqual(items_optional, False) expected_item_keys = [ ("count", int, False) ] for item in expected_item_keys: name, expected_type, expected_optional = item rule = items.get(name) self.assertIsNotNone(rule) _type, optional = rule self.assertIs(_type, expected_type) self.assertEqual(optional, expected_optional)
def test_all_child_all(self): all_copy = copy.deepcopy(ALL_ELEMENT) alt_all_copy = copy.deepcopy(ALT_ALL_ELEMENT) all_copy["children"].append(alt_all_copy) ele = ElementFactory.from_json(all_copy) flattened = flatten_element(ele) output = { "items": [ { "count": 6, "paragraphs": [ { "description": "foo" }, { "description": "bar" } ] } ] } self.assertIsNone(differences(output, flattened))
def test_empty_from_json(self): # an empty Element has no rules or children # creating it should fail empty_json = {"selector": "div", "children": [], "rules": [], "spec": {"type": "single", "index": 0}} with self.assertRaises(ValueError): ElementFactory.from_json(empty_json)
def test_optional_from_json(self): single_copy = SINGLE_JSON.copy() single_copy["optional"] = True e = ElementFactory.from_json(single_copy) self.assertTrue(e.optional)
def test_creates_rule_and_child_objects(self): e = ElementFactory.from_json(SINGLE_JSON) self.assertEqual(len(e.rules), 1) self.assertIsInstance(e.rules[0], Rule) self.assertEqual(e.spec.get("type"), "single") self.assertEqual(e.spec.get("index"), 0)